From 4a9214d09d6a526bd029a1f92a01a5f451313c9a Mon Sep 17 00:00:00 2001 From: johannst Date: Wed, 9 Apr 2025 22:28:27 +0000 Subject: deploy: 773d9b46ee3b1b88a94e69f42ea42654c63c48ec --- development/ld.html | 416 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 416 insertions(+) create mode 100644 development/ld.html (limited to 'development/ld.html') diff --git a/development/ld.html b/development/ld.html new file mode 100644 index 0000000..b1a3fbb --- /dev/null +++ b/development/ld.html @@ -0,0 +1,416 @@ + + + + + + ld - Notes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+
+

ld(1)

+
ld [opts] files...
+    -T <script>        use <script> as linker script
+    --trace            report each file the linker touches
+    --start-group archives --end-group
+                       search archives repearepeatedly until no new
+                       undefined  references are created
+                       (eg helpfull with list of static libraries)
+
+

Linker Script

+

output sections are defined as follows (full description at output +section and input section).

+
section_name [vaddr] : [AT(paddr)] {
+    file_pattern (section_pattern)
+}
+
+

The following gives an example of an output section with two input section rules.

+
.foo : {
+    abc.o (.foo)
+    *.o (.foo.*)
+}
+
+

Example: virtual vs physical (load) address

+

Sometimes code is initially located at a different location as when being run. +For example in embedded cases, where code may initially resides in a rom and +startup code will copy a section with writable data into ram. Code accessing +the writable data accesses the data in the ram.

+

In this case we need different addresses for the same data.

+
    +
  • The virtual or runtime address, this is the address used when the linker +resolves accesses to the data. Hence, this is the address the data will have +when the code is running.
  • +
  • The physical or load address, this is the address the data is stored at +initially. Startup code typically copies the initial values from the +physical to the virtual address.
  • +
+

The following shows an example linker script which uses virtual and physical +addresses. The full source files can be found here.

+
OUTPUT_FORMAT(elf64-x86-64)
+ENTRY(_entry)
+
+SECTIONS {
+    /* Set the initial location counter (vaddr) */
+    . = 0x00800000;
+
+    /* Create .text output section at current vaddr */
+    .text : {
+        *(.text*)
+    }
+
+    ASSERT(. == 0x00800000 + SIZEOF(.text), "inc loc counter automatically")
+
+    /* Create .data section at location counter aligned to the next 0x100 (vaddr) */
+    /* Set the load address to  0x00100000 (paddr) */
+    .data ALIGN(0x100) : AT(0x00100000) {
+        HIDDEN(_data_vaddr = .);
+        HIDDEN(_data_paddr = LOADADDR(.data));
+        *(.data*)
+    }
+
+    /* Create .rodata with explicit vaddr */
+    /* Re-adjust the paddr location counter */
+    .rodata 0x00804000 : AT(ADDR(.rodata)) {
+        *(.rodata*)
+    }
+
+    ASSERT(. == 0x00804000 + SIZEOF(.rodata), "inc loc counter automatically")
+
+    .stack ALIGN (0x1000) : {
+        . += 0x1000;
+        HIDDEN(_stack_top = .);
+    }
+
+    /DISCARD/ : {
+        *(.*)
+    }
+}
+
+/* Some example assertions */
+ASSERT(ADDR(.data) != LOADADDR(.data), "DATA vaddr and paddr must be different")
+ASSERT(SIZEOF(.stack) == 0x1000, "STACK section must be 0x1000")
+
+

We can use the following assembly snippet to explore the linker script.

+
.section .text, "ax", @progbits
+.global _entry
+_entry:
+    mov $_stack_top, %rsp
+    mov $asm_array, %rax
+    mov (asm_len), %eax
+
+    hlt
+    jmp _entry
+
+.section .data.asm, "aw", @progbits
+asm_array:
+    .4byte 0xa
+    .4byte 0xb
+    .4byte 0xc
+    .4byte 0xd
+.rept 4
+    .4byte 0xff
+.endr
+
+.section .rodata.asm, "a", @progbits
+asm_len:
+    .4byte 8
+
+
+

gcc -c data.S && ld -o link-nomem -T link-nomem.ld data.o

+
+

The elf load segments show the difference in physical and virtual address +for the segment containing the .data section.

+
> readelf -W -l link-nomem
+# There are 4 program headers, starting at offset 64
+#
+# Program Headers:
+#   Type   Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
+#   LOAD   0x001100 0x0000000000800100 0x0000000000100000 0x000020 0x000020 RW  0x1000
+#   LOAD   0x002000 0x0000000000800000 0x0000000000800000 0x000018 0x000018 R E 0x1000
+#   LOAD   0x003000 0x0000000000804000 0x0000000000804000 0x000004 0x000004 R   0x1000
+#   LOAD   0x000000 0x0000000000805000 0x0000000000805000 0x000000 0x001000 RW  0x1000
+#
+#  Section to Segment mapping:
+#   Segment Sections...
+#   00     .data
+#   01     .text
+#   02     .rodata
+#   03     .stack
+
+

Startup code could copy data from _data_paddr to _data_vaddr.

+
> nm link-nomem
+# 0000000000800100 d asm_array
+# 0000000000804000 r asm_len
+# 0000000000100000 a _data_paddr
+# 0000000000800100 d _data_vaddr
+# 0000000000800000 T _entry
+# 0000000000806000 b _stack_top
+
+

The linker resolves symbols to their virtual address, this can be seen by the +access to the asm_array variable.

+
> objdump -d link-nomem
+# Disassembly of section .text:
+#
+# 0000000000800000 <_entry>:
+#   800000:	48 c7 c4 00 60 80 00 	mov    $0x806000,%rsp
+#   800007:	48 c7 c0 00 01 80 00 	mov    $0x800100,%rax   ;; mov $asm_array, %rax
+#   80000e:	8b 04 25 00 40 80 00 	mov    0x804000,%eax
+#   800015:	f4                   	hlt
+#   800016:	eb e8                	jmp    800000 <_entry>
+
+

The following linker script shows an example with the MEMORY command.

+
OUTPUT_FORMAT(elf64-x86-64)
+ENTRY(_entry)
+
+MEMORY {
+    ROM : ORIGIN = 0x00100000, LENGTH = 0x4000
+    RAM : ORIGIN = 0x00800000, LENGTH = 0x4000
+}
+
+SECTIONS {
+    /* Create .text output section at ROM (vaddr) */
+    .text : {
+        *(.text*)
+    } > ROM
+
+    ASSERT(. == ORIGIN(ROM) + SIZEOF(.text), "inc loc counter automatically")
+
+    /* Create .data output section at RAM (vaddr) */
+    /* Set load addr to ROM, right after .text (paddr) */
+    .data : {
+        HIDDEN(_data_vaddr = .);
+        HIDDEN(_data_paddr = LOADADDR(.data));
+        *(.data*)
+    } > RAM AT > ROM
+
+    /* Append .rodata output section at ROM (vaddr) */
+    .rodata : {
+        *(.rodata*)
+    } > ROM
+
+    /* Append .stack output section at RAM (vaddr) aligned up to next 0x1000 */
+    .stack : ALIGN (0x1000) {
+        . += 0x1000;
+        HIDDEN(_stack_top = .);
+    } > RAM
+
+    /DISCARD/ : {
+        *(.*)
+    }
+}
+
+/* Some example assertions */
+ASSERT(ADDR(.data) != LOADADDR(.data), "DATA vaddr and paddr must be different")
+ASSERT(ADDR(.rodata) == LOADADDR(.rodata), "RODATA vaddr and paddr must be euqal")
+ASSERT(ADDR(.stack) == ORIGIN(RAM) + 0x1000, "STACK section must aligned to 0x1000")
+ASSERT(SIZEOF(.stack) == 0x1000, "STACK section must be 0x1000")
+
+

References

+ + +
+ + +
+
+ + + +
+ + + + + + + + + + + + + + + + + + +
+ + -- cgit v1.2.3