Click here to Skip to main content
15,878,049 members
Please Sign up or sign in to vote.
5.00/5 (1 vote)
Hello,

I've noticed that whenever you write a program in Visual Studio that uses structs (or classes), the assembly code generated doesn't contain a single MASM STRUCTURE, even though it would perfectly fine. Instead, VC++ does something I cannot quite understand, but would love an explanation of. But my question is this: why doesn't it use assembly STRUCTs? Is it some kind of performance or program size matter? I doubt it's the latter, since both my code and my exe size were a lot smaller.

Let me give you an example. This is the C++ program we're asking VC++ to compile:
C++
#include <cstdio>
#include <cmath>

struct MyType
{
    int age;
};

const char* sz = "I am %d years old\n";

int main()
{
    MyType obj;
    obj.age = 30;
    printf(sz, obj.age);
}



And this is the assembly code it generates for us. Notice that it doesn't use something like MyType STRUCT anywhere; instead it simply does this, which makes no sense to me at all:
ASM
_TEXT	SEGMENT
_obj$ = -4						; size = 4
... (ca. 13 lines later)
mov	 DWORD PTR _obj$[ebp], 30 ; bytes=7, 0000001eH


This is the complete code generated by Visual Studio:
ASM
; Listing generated by Microsoft (R) Optimizing Compiler Version 19.28.29336.0 

	TITLE	C:\Users\Thomas\source\C#\MultiCompiler2\bin\Debug\program1.cpp
	.686P
	.XMM
	include listing.inc
	.model	flat

INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES

PUBLIC	?sz@@3PBDB					; sz
CONST	SEGMENT
$SG17424 DB	'I am %d years old', 0aH, 00H
CONST	ENDS
_DATA	SEGMENT
?sz@@3PBDB DD	FLAT:$SG17424				; sz
_DATA	ENDS
PUBLIC	___local_stdio_printf_options
PUBLIC	__vfprintf_l
PUBLIC	_printf
PUBLIC	_main
PUBLIC	?_OptionsStorage@?1??__local_stdio_printf_options@@9@4_KA ; `__local_stdio_printf_options'::`2'::_OptionsStorage
EXTRN	___acrt_iob_func:PROC
EXTRN	___stdio_common_vfprintf:PROC
;	COMDAT ?_OptionsStorage@?1??__local_stdio_printf_options@@9@4_KA
_BSS	SEGMENT
?_OptionsStorage@?1??__local_stdio_printf_options@@9@4_KA DQ 01H DUP (?) ; `__local_stdio_printf_options'::`2'::_OptionsStorage
_BSS	ENDS
; Function compile flags: /Odtp
; File C:\Users\Thomas\source\C#\MultiCompiler2\bin\Debug\program1.cpp
_TEXT	SEGMENT
_obj$ = -4						; size = 4
_main	PROC

; 12   : {

  00000	55		 push	 ebp			; bytes=1
  00001	8b ec		 mov	 ebp, esp		; bytes=2
  00003	51		 push	 ecx			; bytes=1

; 13   :     MyType obj;
; 14   :     obj.age = 30;

  00004	c7 45 fc 1e 00
	00 00		 mov	 DWORD PTR _obj$[ebp], 30 ; bytes=7, 0000001eH

; 15   :     printf(sz, obj.age);

  0000b	8b 45 fc	 mov	 eax, DWORD PTR _obj$[ebp] ; bytes=3
  0000e	50		 push	 eax			; bytes=1
  0000f	8b 0d 00 00 00
	00		 mov	 ecx, DWORD PTR ?sz@@3PBDB ; bytes=6, sz
  00015	51		 push	 ecx			; bytes=1
  00016	e8 00 00 00 00	 call	 _printf			; bytes=5
  0001b	83 c4 08	 add	 esp, 8			; bytes=3

; 16   : }

  0001e	33 c0		 xor	 eax, eax		; bytes=2
  00020	8b e5		 mov	 esp, ebp		; bytes=2
  00022	5d		 pop	 ebp			; bytes=1
  00023	c3		 ret	 0			; bytes=1
_main	ENDP
_TEXT	ENDS
; Function compile flags: /Odtp
; File C:\Program Files (x86)\Windows Kits\10\Include\10.0.18362.0\ucrt\stdio.h
;	COMDAT _printf
_TEXT	SEGMENT
__Result$ = -16						; size = 4
tv73 = -12						; size = 4
tv75 = -8						; size = 4
__ArgList$ = -4						; size = 4
__Format$ = 8						; size = 4
_printf	PROC						; COMDAT

; 954  :     {

  00000	55		 push	 ebp			; bytes=1
  00001	8b ec		 mov	 ebp, esp		; bytes=2
  00003	83 ec 10	 sub	 esp, 16			; bytes=3, 00000010H

; 955  :         int _Result;
; 956  :         va_list _ArgList;
; 957  :         __crt_va_start(_ArgList, _Format);

  00006	8d 45 0c	 lea	 eax, DWORD PTR __Format$[ebp+4] ; bytes=3
  00009	89 45 fc	 mov	 DWORD PTR __ArgList$[ebp], eax ; bytes=3

; 958  :         _Result = _vfprintf_l(stdout, _Format, NULL, _ArgList);

  0000c	8b 4d 08	 mov	 ecx, DWORD PTR __Format$[ebp] ; bytes=3
  0000f	89 4d f8	 mov	 DWORD PTR tv75[ebp], ecx ; bytes=3
  00012	6a 01		 push	 1			; bytes=2
  00014	e8 00 00 00 00	 call	 ___acrt_iob_func	; bytes=5
  00019	83 c4 04	 add	 esp, 4			; bytes=3
  0001c	89 45 f4	 mov	 DWORD PTR tv73[ebp], eax ; bytes=3
  0001f	8b 55 fc	 mov	 edx, DWORD PTR __ArgList$[ebp] ; bytes=3
  00022	52		 push	 edx			; bytes=1
  00023	6a 00		 push	 0			; bytes=2
  00025	8b 45 f8	 mov	 eax, DWORD PTR tv75[ebp] ; bytes=3
  00028	50		 push	 eax			; bytes=1
  00029	8b 4d f4	 mov	 ecx, DWORD PTR tv73[ebp] ; bytes=3
  0002c	51		 push	 ecx			; bytes=1
  0002d	e8 00 00 00 00	 call	 __vfprintf_l		; bytes=5
  00032	83 c4 10	 add	 esp, 16			; bytes=3, 00000010H
  00035	89 45 f0	 mov	 DWORD PTR __Result$[ebp], eax ; bytes=3

; 959  :         __crt_va_end(_ArgList);

  00038	c7 45 fc 00 00
	00 00		 mov	 DWORD PTR __ArgList$[ebp], 0 ; bytes=7

; 960  :         return _Result;

  0003f	8b 45 f0	 mov	 eax, DWORD PTR __Result$[ebp] ; bytes=3

; 961  :     }

  00042	8b e5		 mov	 esp, ebp		; bytes=2
  00044	5d		 pop	 ebp			; bytes=1
  00045	c3		 ret	 0			; bytes=1
_printf	ENDP
_TEXT	ENDS
; Function compile flags: /Odtp
; File C:\Program Files (x86)\Windows Kits\10\Include\10.0.18362.0\ucrt\stdio.h
;	COMDAT __vfprintf_l
_TEXT	SEGMENT
__Stream$ = 8						; size = 4
__Format$ = 12						; size = 4
__Locale$ = 16						; size = 4
__ArgList$ = 20						; size = 4
__vfprintf_l PROC					; COMDAT

; 642  :     {

  00000	55		 push	 ebp			; bytes=1
  00001	8b ec		 mov	 ebp, esp		; bytes=2

; 643  :         return __stdio_common_vfprintf(_CRT_INTERNAL_LOCAL_PRINTF_OPTIONS, _Stream, _Format, _Locale, _ArgList);

  00003	8b 45 14	 mov	 eax, DWORD PTR __ArgList$[ebp] ; bytes=3
  00006	50		 push	 eax			; bytes=1
  00007	8b 4d 10	 mov	 ecx, DWORD PTR __Locale$[ebp] ; bytes=3
  0000a	51		 push	 ecx			; bytes=1
  0000b	8b 55 0c	 mov	 edx, DWORD PTR __Format$[ebp] ; bytes=3
  0000e	52		 push	 edx			; bytes=1
  0000f	8b 45 08	 mov	 eax, DWORD PTR __Stream$[ebp] ; bytes=3
  00012	50		 push	 eax			; bytes=1
  00013	e8 00 00 00 00	 call	 ___local_stdio_printf_options ; bytes=5
  00018	8b 48 04	 mov	 ecx, DWORD PTR [eax+4]	; bytes=3
  0001b	51		 push	 ecx			; bytes=1
  0001c	8b 10		 mov	 edx, DWORD PTR [eax]	; bytes=2
  0001e	52		 push	 edx			; bytes=1
  0001f	e8 00 00 00 00	 call	 ___stdio_common_vfprintf ; bytes=5
  00024	83 c4 18	 add	 esp, 24			; bytes=3, 00000018H

; 644  :     }

  00027	5d		 pop	 ebp			; bytes=1
  00028	c3		 ret	 0			; bytes=1
__vfprintf_l ENDP
_TEXT	ENDS
; Function compile flags: /Odtp
; File C:\Program Files (x86)\Windows Kits\10\Include\10.0.18362.0\ucrt\corecrt_stdio_config.h
;	COMDAT ___local_stdio_printf_options
_TEXT	SEGMENT
___local_stdio_printf_options PROC			; COMDAT

; 86   :     {

  00000	55		 push	 ebp			; bytes=1
  00001	8b ec		 mov	 ebp, esp		; bytes=2

; 87   :         static unsigned __int64 _OptionsStorage;
; 88   :         return &_OptionsStorage;

  00003	b8 00 00 00 00	 mov	 eax, OFFSET ?_OptionsStorage@?1??__local_stdio_printf_options@@9@4_KA ; bytes=5, `__local_stdio_printf_options'::`2'::_OptionsStorage

; 89   :     }

  00008	5d		 pop	 ebp			; bytes=1
  00009	c3		 ret	 0			; bytes=1
___local_stdio_printf_options ENDP
_TEXT	ENDS
END


That assembly code became a 99,5 KB (101.888 bytes) executable.
Now, when I compile my own assembly code, which does exactly the same if we're focusing on what the C++ code does, I get a 1,5 KB executable:
ASM
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
    include \masm32\include\masm32rt.inc
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤


MyType STRUCT
    age DWORD 0
MyType ENDS

.data
    sz db "I am %d years old", 10, 0

.code

start:
   
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    call main
    inkey
    exit

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

main proc
    LOCAL obj: MyType
    mov obj.age, 30
    

    cls
    push obj.age
    push OFFSET sz
    call crt_printf

    ret

main endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

end start


How can it accomplish the same by doing _obj$ = -4? Is it because I only use one object and it knows my program isn't truly object oriented?

I'd love to know!
Thank you in advance.

What I have tried:

I've tried figuring out how it works. If you're making an object oriented language that translates to MASM, wouldn't it be a good idea to instead create a STRUCT for all structs/classes in the original language? That way you could put methods (since functions cannot be in STRUCTS in MASM) in the global scope and give them an extra "this" parameter that would be invisible in the original language, which would be of e.g. type MyType. That's the kind of logic I've tried to find in the asm code, but the way VC++ does it doesn't even come close AFAIK.
Posted
Updated 29-Oct-21 10:42am

1 solution

Assembly does not have structs. MASM does but it is its own interpretation of Assembly. Indeed the name itself means Macro ASseMbler and STRUCT is one of these macros, provided to help the developers.

This kind of additional keywords is normal across all assemblers.
 
Share this answer
 
Comments
deXo-fan 26-Oct-21 18:00pm    
I see. I don't know why, but I thought it stood for Microsoft Assembler. Do you know how they keep track of objects and their members without STRUCTs? Like that line I talked about:

_TEXT SEGMENT
_obj$ = -4 ; size = 4
... (ca. 13 lines later)
mov DWORD PTR _obj$[ebp], 30 ; bytes=7, 0000001eH

What's going on here? It looks to me like they're setting an integer variable to -4, but then later on accessing that same variable as though it were an array or something similar.
den2k88 27-Oct-21 3:17am    
They don't, assembler is a direct translation of machine code, it works at the machine level. At that level, memory is flat, there are only addresses. Every kind of logic or structure is added by the programmer, in Assembly, or by the language when using high level languages.

A variable is simply the content of a memory location, so is a member of a structure, the address of a method, everything. What do they mean in context is decided by the language constructs and the programmer's logic.

Down to the assembly there are no objects, no methods, no variables. Only a flat area of memory. Methods are functions, functions are lists of assembly instructions.

As for that snippet, it *should* (I am a bit rusty in MASM) access the stack (EBP register points at the end of the stack) 4 bytes before its start, which should be the location of the return value in function calls. It is not properly an array but raw pointer arithmetics in raw memory.
deXo-fan 27-Oct-21 18:55pm    
Thank you for sharing your knowledge with me, den2k88! (I don't know what else to call you). I really appreciate it. I have a couple more questions if you don't mind answering them.

About STRUCT being a macro, does this mean that the MASM32 Assembler converts that code into something that looks like the ebp snippet?

And whenever you make an object in C(++) that is a struct/class, does the compiler simply treat every member of every struct as global variables? Also, how does malloc() fit into this way of keeping track of things?

Does ebp always point at the end of the stack? Why 4 bytes before its start? Does that mean 4 bytes before the end of the stack? I thought eax was the primary/preferred register to put return values into.
den2k88 28-Oct-21 3:39am    
> About STRUCT being a macro, does this mean that the MASM32 Assembler converts that code into something that looks like the ebp snippet?

Yes, there should be the compilation option to show the expanded code before the 1:1 translation in machine code.

> And whenever you make an object in C(++) that is a struct/class, does the compiler simply treat every member of every struct as global variables?

Yes and no, usually global variables are placed in a separate segment of memory while local variables are always present only on the current stack. Also everything created with malloc() or new lives in a different area of memory, called heap, which is managed by the OS through low level APIs.

> Also, how does malloc() fit into this way of keeping track of things?

It uses an external memory segment which is "unlimited", the size and location of which is actually managed by the OS. There are OSes out there without heap, therefore without malloc and new - mostly for microcontrollers.

Does ebp always point at the end of the stack? Yes, in x86 and derived architectures EBP is a special purpose register which is also used by some CPU instructions. You may repurpose it but you have to restore its original content before returning from the function if you want the program to work - attacking EBP is (was) a good way to execute exploits, the dreaded stack overflow attack.

> Why 4 bytes before its start? Does that mean 4 bytes before the end of the stack? I thought eax was the primary/preferred register to put return values into.

It depends on the calling convention of the function, which defines where the function is expecting its arguments, where it stores its return value and who is responsible for cleaning up the function stack. The convention I know of are cdecl, stdcall and fastcall. The one you refer with eax as return register should be fastcall if I remember correctly.

Also I may have not been completely accurate on what the snippet does: watching more thoroughly it should be placing the argument of the printf 4 bytes before the current stack pointer, but IIRC the stack pointer grows "upwards", from high address to low address.

Consider that I'm not actively developing in x86 assembler since half a decade, so while I'm fairly sure the gist of my answer is correct I probably misplaced some explanation, especially since you're making more and more in-depth questions ;P
deXo-fan 29-Oct-21 16:05pm    
Well, I can tell you this: I had only read about half way through your answer when I thought to myself, "This person is a goldmine of information!". Seriously, I've learned A LOT from you, and I immensely appreciate you taking the time to answer all my questions so thoroughly and detailed! Thank you! I take it as a compliment that you think I'm asking more and more in-depth questions, sort of like "You're learning" ;), and that's also a positive reflection on you since I probably wouldn't be asking these questions if I didn't understand your answers.

But I don't want to keep you if you are tired of all my questions, which would be quite understandable. However, if you're not then I have just a couple more:

1) Do you have to set EBP manually to point to the end of the stack at the beginning of every function, and is that what this code does? (These are the first 3 lines of a function I wrote):
push ebp ; bytes=1
mov ebp, esp ; bytes=2
push ecx ; bytes=1
Why is it necessary to push ebp before moving esp into it? Does the compiler know somehow that it has a necessary value in it already before entering the function? Aren't we repurposing it by doing this? And by restoring EBP, is that what we're doing here, which is the last lines of the function?
mov esp, ebp ; bytes=2
pop ebp ; bytes=1

This is the entire code for the function:
; 13   : void f(char& a, char& b) {

  00000	55		 push	 ebp			; bytes=1
  00001	8b ec		 mov	 ebp, esp		; bytes=2
  00003	51		 push	 ecx			; bytes=1

; 14   :     char A = a;

  00004	8b 45 08	 mov	 eax, DWORD PTR _a$[ebp]	; bytes=3
  00007	8a 08		 mov	 cl, BYTE PTR [eax]	; bytes=2
  00009	88 4d ff	 mov	 BYTE PTR _A$[ebp], cl	; bytes=3

; 15   :     a = b;

  0000c	8b 55 08	 mov	 edx, DWORD PTR _a$[ebp]	; bytes=3
  0000f	8b 45 0c	 mov	 eax, DWORD PTR _b$[ebp]	; bytes=3
  00012	8a 08		 mov	 cl, BYTE PTR [eax]	; bytes=2
  00014	88 0a		 mov	 BYTE PTR [edx], cl	; bytes=2

; 16   :     b = A;

  00016	8b 55 0c	 mov	 edx, DWORD PTR _b$[ebp]	; bytes=3
  00019	8a 45 ff	 mov	 al, BYTE PTR _A$[ebp]	; bytes=3
  0001c	88 02		 mov	 BYTE PTR [edx], al	; bytes=2

; 17   : }

  0001e	8b e5		 mov	 esp, ebp		; bytes=2
  00020	5d		 pop	 ebp			; bytes=1
  00021	c3		 ret	 0			; bytes=1
?f@@YAXAAD0@Z ENDP					; f


2) Can you recommend a good book on learning Assembly, preferably MASM32?

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900