aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJohannes Stoelp <johannes.stoelp@gmail.com>2025-03-30 01:46:26 +0100
committerJohannes Stoelp <johannes.stoelp@gmail.com>2025-03-30 01:51:58 +0100
commitab04f8876eef5e9da79573368d440da067293c2e (patch)
tree287482f68f0ac49317adbb0f4b7c27c7f9e81cd4
parent916b73bee95494c205ba67e4a50e6a525afc3a3c (diff)
downloadnotes-ab04f8876eef5e9da79573368d440da067293c2e.tar.gz
notes-ab04f8876eef5e9da79573368d440da067293c2e.zip
ld: linker script example
-rw-r--r--src/SUMMARY.md1
-rw-r--r--src/development/README.md1
-rw-r--r--src/development/ld.md123
-rw-r--r--src/development/ld/Makefile15
-rw-r--r--src/development/ld/data.S23
-rw-r--r--src/development/ld/link-mem.ld45
-rw-r--r--src/development/ld/link-nomem.ld43
7 files changed, 251 insertions, 0 deletions
diff --git a/src/SUMMARY.md b/src/SUMMARY.md
index 2daa08a..ac41b1a 100644
--- a/src/SUMMARY.md
+++ b/src/SUMMARY.md
@@ -73,6 +73,7 @@
- [glibc](./development/glibc.md)
- [gcc](./development/gcc.md)
- [gas](./development/gas.md)
+ - [ld](./development/ld.md)
- [git](./development/git.md)
- [cmake](./development/cmake.md)
- [make](./development/make.md)
diff --git a/src/development/README.md b/src/development/README.md
index 437528d..eb3dbf5 100644
--- a/src/development/README.md
+++ b/src/development/README.md
@@ -5,6 +5,7 @@
- [glibc](./glibc.md)
- [gcc](./gcc.md)
- [gas](./gas.md)
+- [ld](./ld.md)
- [git](./git.md)
- [cmake](./cmake.md)
- [make](./make.md)
diff --git a/src/development/ld.md b/src/development/ld.md
new file mode 100644
index 0000000..330395f
--- /dev/null
+++ b/src/development/ld.md
@@ -0,0 +1,123 @@
+# ld(1)
+
+```
+ld [opts] files...
+ -T <script> use <script> as linker script
+ --trace report each file the linker touches
+ --start-group archives --end-group
+ search archives repearepeatedly until no new
+ undefined references are created
+ (eg helpfull with list of static libraries)
+```
+
+## Linker Script
+
+`output` sections are defined as follows (full description at [output
+section][ld-out] and [input section][ld-in]).
+
+```
+section_name [vaddr] : [AT(paddr)] {
+ file_pattern (section_pattern)
+}
+```
+
+The following gives an example of an `output` section with two `input` section rules.
+```
+.foo : {
+ abc.o (.foo)
+ *.o (.foo.*)
+}
+```
+
+### Example: virtual vs physical (load) address
+
+Sometimes code is initially located at a different location as when being run.
+For example in embedded cases, where code may initially resides in a _rom_ and
+startup code will copy a section with writable _data_ into _ram_. Code accessing
+the writable data accesses the data in the _ram_.
+
+In this case we need different addresses for the same data.
+- The `virtual` or _runtime_ address, this is the address used when the linker
+ resolves accesses to the data. Hence, this is the address the data will have
+ when the code is running.
+- The `physical` or _load_ address, this is the address the data is stored at
+ initially. Startup code typically copies the initial values from the
+ `physical` to the `virtual` address.
+
+The following shows an example linker script which uses _virtual_ and _physical_
+addresses. The full source files can be found [here][src].
+
+```
+{{#include ld/link-nomem.ld}}
+```
+
+We can use the following assembly snippet to explore the linker script.
+
+```x86asm
+{{#include ld/data.S}}
+```
+> `gcc -c data.S && ld -o link-nomem -T link-nomem.ld data.o`
+
+The elf load segments show the difference in _physical_ and _virtual_ address
+for the segment containing the `.data` section.
+```sh
+> readelf -W -l link-nomem
+# There are 4 program headers, starting at offset 64
+#
+# Program Headers:
+# Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
+# LOAD 0x001100 0x0000000000800100 0x0000000000100000 0x000020 0x000020 RW 0x1000
+# LOAD 0x002000 0x0000000000800000 0x0000000000800000 0x000018 0x000018 R E 0x1000
+# LOAD 0x003000 0x0000000000804000 0x0000000000804000 0x000004 0x000004 R 0x1000
+# LOAD 0x000000 0x0000000000805000 0x0000000000805000 0x000000 0x001000 RW 0x1000
+#
+# Section to Segment mapping:
+# Segment Sections...
+# 00 .data
+# 01 .text
+# 02 .rodata
+# 03 .stack
+```
+
+Startup code could copy data from `_data_paddr` to `_data_vaddr`.
+```sh
+> nm link-nomem
+# 0000000000800100 d asm_array
+# 0000000000804000 r asm_len
+# 0000000000100000 a _data_paddr
+# 0000000000800100 d _data_vaddr
+# 0000000000800000 T _entry
+# 0000000000806000 b _stack_top
+```
+
+The linker resolves symbols to their _virtual_ address, this can be seen by the
+access to the `asm_array` variable.
+
+```sh
+> objdump -d link-nomem
+# Disassembly of section .text:
+#
+# 0000000000800000 <_entry>:
+# 800000: 48 c7 c4 00 60 80 00 mov $0x806000,%rsp
+# 800007: 48 c7 c0 00 01 80 00 mov $0x800100,%rax ;; mov $asm_array, %rax
+# 80000e: 8b 04 25 00 40 80 00 mov 0x804000,%eax
+# 800015: f4 hlt
+# 800016: eb e8 jmp 800000 <_entry>
+```
+
+The following linker script shows an example with the `MEMORY` command.
+```
+{{#include ld/link-mem.ld}}
+```
+
+
+## References
+- [ld manual][ld]
+- [ld script: input sections][ld-in]
+- [ld script: output sections][ld-out]
+- [notes/ld example files][src]
+
+[ld]: https://sourceware.org/binutils/docs/ld/
+[ld-in]: https://sourceware.org/binutils/docs/ld/Input-Section.html
+[ld-out]: https://sourceware.org/binutils/docs/ld/Output-Section-Attributes.html
+[src]: https://github.com/johannst/notes/tree/master/src/development/ld
diff --git a/src/development/ld/Makefile b/src/development/ld/Makefile
new file mode 100644
index 0000000..16869a8
--- /dev/null
+++ b/src/development/ld/Makefile
@@ -0,0 +1,15 @@
+show-mem:
+show-nomem:
+show-%: link-%
+ readelf -W -S -l $^
+ nm $^
+ objdump -d $^
+
+link-%: link-%.ld data.o
+ ld -o $@ -T $^
+
+%.o: %.S
+ gcc -c $^
+
+clean:
+ $(RM) mem *.o
diff --git a/src/development/ld/data.S b/src/development/ld/data.S
new file mode 100644
index 0000000..d76adcf
--- /dev/null
+++ b/src/development/ld/data.S
@@ -0,0 +1,23 @@
+.section .text, "ax", @progbits
+.global _entry
+_entry:
+ mov $_stack_top, %rsp
+ mov $asm_array, %rax
+ mov (asm_len), %eax
+
+ hlt
+ jmp _entry
+
+.section .data.asm, "aw", @progbits
+asm_array:
+ .4byte 0xa
+ .4byte 0xb
+ .4byte 0xc
+ .4byte 0xd
+.rept 4
+ .4byte 0xff
+.endr
+
+.section .rodata.asm, "a", @progbits
+asm_len:
+ .4byte 8
diff --git a/src/development/ld/link-mem.ld b/src/development/ld/link-mem.ld
new file mode 100644
index 0000000..b5167a5
--- /dev/null
+++ b/src/development/ld/link-mem.ld
@@ -0,0 +1,45 @@
+OUTPUT_FORMAT(elf64-x86-64)
+ENTRY(_entry)
+
+MEMORY {
+ ROM : ORIGIN = 0x00100000, LENGTH = 0x4000
+ RAM : ORIGIN = 0x00800000, LENGTH = 0x4000
+}
+
+SECTIONS {
+ /* Create .text output section at ROM (vaddr) */
+ .text : {
+ *(.text*)
+ } > ROM
+
+ ASSERT(. == ORIGIN(ROM) + SIZEOF(.text), "inc loc counter automatically")
+
+ /* Create .data output section at RAM (vaddr) */
+ /* Set load addr to ROM, right after .text (paddr) */
+ .data : {
+ HIDDEN(_data_vaddr = .);
+ HIDDEN(_data_paddr = LOADADDR(.data));
+ *(.data*)
+ } > RAM AT > ROM
+
+ /* Append .rodata output section at ROM (vaddr) */
+ .rodata : {
+ *(.rodata*)
+ } > ROM
+
+ /* Append .stack output section at RAM (vaddr) aligned up to next 0x1000 */
+ .stack : ALIGN (0x1000) {
+ . += 0x1000;
+ HIDDEN(_stack_top = .);
+ } > RAM
+
+ /DISCARD/ : {
+ *(.*)
+ }
+}
+
+/* Some example assertions */
+ASSERT(ADDR(.data) != LOADADDR(.data), "DATA vaddr and paddr must be different")
+ASSERT(ADDR(.rodata) == LOADADDR(.rodata), "RODATA vaddr and paddr must be euqal")
+ASSERT(ADDR(.stack) == ORIGIN(RAM) + 0x1000, "STACK section must aligned to 0x1000")
+ASSERT(SIZEOF(.stack) == 0x1000, "STACK section must be 0x1000")
diff --git a/src/development/ld/link-nomem.ld b/src/development/ld/link-nomem.ld
new file mode 100644
index 0000000..32b7f3c
--- /dev/null
+++ b/src/development/ld/link-nomem.ld
@@ -0,0 +1,43 @@
+OUTPUT_FORMAT(elf64-x86-64)
+ENTRY(_entry)
+
+SECTIONS {
+ /* Set the initial location counter (vaddr) */
+ . = 0x00800000;
+
+ /* Create .text output section at current vaddr */
+ .text : {
+ *(.text*)
+ }
+
+ ASSERT(. == 0x00800000 + SIZEOF(.text), "inc loc counter automatically")
+
+ /* Create .data section at location counter aligned to the next 0x100 (vaddr) */
+ /* Set the load address to 0x00100000 (paddr) */
+ .data ALIGN(0x100) : AT(0x00100000) {
+ HIDDEN(_data_vaddr = .);
+ HIDDEN(_data_paddr = LOADADDR(.data));
+ *(.data*)
+ }
+
+ /* Create .rodata with explicit vaddr */
+ /* Re-adjust the paddr location counter */
+ .rodata 0x00804000 : AT(ADDR(.rodata)) {
+ *(.rodata*)
+ }
+
+ ASSERT(. == 0x00804000 + SIZEOF(.rodata), "inc loc counter automatically")
+
+ .stack ALIGN (0x1000) : {
+ . += 0x1000;
+ HIDDEN(_stack_top = .);
+ }
+
+ /DISCARD/ : {
+ *(.*)
+ }
+}
+
+/* Some example assertions */
+ASSERT(ADDR(.data) != LOADADDR(.data), "DATA vaddr and paddr must be different")
+ASSERT(SIZEOF(.stack) == 0x1000, "STACK section must be 0x1000")