entry: C compilation
This commit is contained in:
parent
6df71ccdcd
commit
ecc1117fae
1 changed files with 203 additions and 0 deletions
203
zk/C_compilation_process.md
Normal file
203
zk/C_compilation_process.md
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
---
|
||||
tags:
|
||||
- C
|
||||
---
|
||||
|
||||
# The C compilation process
|
||||
|
||||
C code is compiled to a binary executable in four stages:
|
||||
|
||||
1. Preprocessing
|
||||
2. Compilation
|
||||
3. Assembly
|
||||
4. Linking
|
||||
|
||||
To demonstrate the output at the different stages I will compile the following
|
||||
simple program:
|
||||
|
||||
```c
|
||||
#include stdio.h
|
||||
|
||||
int main (void)
|
||||
{
|
||||
printf("Hello world!");
|
||||
}
|
||||
```
|
||||
|
||||
For standard compilation when you don't need to see all the interim stages, you
|
||||
just run the following in your source directory:
|
||||
|
||||
```sh
|
||||
gcc main.c
|
||||
```
|
||||
|
||||
This generates:
|
||||
|
||||
```
|
||||
a.out main.c
|
||||
```
|
||||
|
||||
`a.out` is the executable binary.
|
||||
|
||||
To run this code:
|
||||
|
||||
```sh
|
||||
./a.out
|
||||
```
|
||||
|
||||
To compile to specified file name:
|
||||
|
||||
```sh
|
||||
gcc -o hello_world main.c
|
||||
```
|
||||
|
||||
Then to run:
|
||||
|
||||
```
|
||||
./hello_world
|
||||
```
|
||||
|
||||
## Preprocessing
|
||||
|
||||
The processor finds all directives starting with `#` such as header file
|
||||
`include` statements and adds them to your source code.
|
||||
|
||||
View this with:
|
||||
|
||||
```sh
|
||||
gcc -E main.c
|
||||
```
|
||||
|
||||
Here is an example for my script:
|
||||
|
||||
```
|
||||
extern char *ctermid (char *__s) __attribute__ ((__nothrow__ , __leaf__))
|
||||
__attribute__ ((__access__ (__write_only__, 1)));
|
||||
# 931 "/usr/include/stdio.h" 3 4
|
||||
extern void flockfile (FILE *__stream) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1)));
|
||||
|
||||
|
||||
|
||||
extern int ftrylockfile (FILE *__stream) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1)));
|
||||
|
||||
|
||||
extern void funlockfile (FILE *__stream) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1)));
|
||||
# 949 "/usr/include/stdio.h" 3 4
|
||||
extern int __uflow (FILE *);
|
||||
extern int __overflow (FILE *, int);
|
||||
# 973 "/usr/include/stdio.h" 3 4
|
||||
|
||||
# 2 "main.c" 2
|
||||
|
||||
|
||||
# 3 "main.c"
|
||||
int main(void) { printf("Hello world"); }
|
||||
```
|
||||
|
||||
## Compilation
|
||||
|
||||
Takes the pre-processed source code and translates it into assembly language for
|
||||
your target architecture.
|
||||
|
||||
At this stage your code is assessed by the compiler for syntax errors and
|
||||
optimisation.
|
||||
|
||||
The result is human-readable assembly in a file called `main.s`
|
||||
|
||||
Create this with:
|
||||
|
||||
```sh
|
||||
gcc -S main.c
|
||||
```
|
||||
|
||||
Here is the output for my script:
|
||||
|
||||
```
|
||||
.file "main.c"
|
||||
.text
|
||||
.section .rodata
|
||||
.LC0:
|
||||
.string "Hello world"
|
||||
.text
|
||||
.globl main
|
||||
.type main, @function
|
||||
main:
|
||||
.LFB0:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_def_cfa_offset 16
|
||||
.cfi_offset 6, -16
|
||||
movq %rsp, %rbp
|
||||
.cfi_def_cfa_register 6
|
||||
leaq .LC0(%rip), %rax
|
||||
movq %rax, %rdi
|
||||
movl $0, %eax
|
||||
call printf@PLT
|
||||
movl $0, %eax
|
||||
popq %rbp
|
||||
.cfi_def_cfa 7, 8
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LFE0:
|
||||
.size main, .-main
|
||||
.ident "GCC: (GNU) 15.2.1 20250813"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
```
|
||||
|
||||
## Assembly
|
||||
|
||||
The assembly language code is converted into machine code. The output is an
|
||||
**object file** (`.o`) which containes the machine code but is not yet
|
||||
executable because it is not yet linked to the functions and variables that come
|
||||
from imported code. Your object file is not yet combined with the object files
|
||||
of the libraries and resources you have used.
|
||||
|
||||
Create just the object file with:
|
||||
|
||||
```sh
|
||||
gcc -c main.c
|
||||
```
|
||||
|
||||
As it is a binary file it is not human-readable. However you can us `objdump` to
|
||||
view a more intelligble representation of the output.
|
||||
|
||||
```sh
|
||||
objdump -dS main.o
|
||||
```
|
||||
|
||||
```
|
||||
main.o: file format elf64-x86-64
|
||||
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
0000000000000000 <main>:
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) { printf("Hello world"); }
|
||||
0: 55 push %rbp
|
||||
1: 48 89 e5 mov %rsp,%rbp
|
||||
4: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # b <main+0xb>
|
||||
b: 48 89 c7 mov %rax,%rdi
|
||||
e: b8 00 00 00 00 mov $0x0,%eax
|
||||
13: e8 00 00 00 00 call 18 <main+0x18>
|
||||
18: b8 00 00 00 00 mov $0x0,%eax
|
||||
1d: 5d pop %rbp
|
||||
1e: c3 ret
|
||||
```
|
||||
|
||||
To break this down:
|
||||
|
||||
- Left: the offset (address within this object file)
|
||||
- Middle: the actual bytes - this is what the CPU reads and executes
|
||||
- Right: the human-readable assembly, which is just a translation of those bytes
|
||||
|
||||
> The assembly here is different to the assembly earlier in `main.s`. This is
|
||||
> because this time it is being interpreted after it has been written to machine
|
||||
> code. It's a translation back from machine code to assembly.
|
||||
|
||||
## Linking
|
||||
|
||||
In the final stage the object files are combined, resolving all the references
|
||||
between them. The result of this stage will be the `a.out` file mentioned
|
||||
earlier.
|
||||
Loading…
Add table
Reference in a new issue