From 4e871c9dd0418c4f6d33c83fd3338ad261f7dd3f Mon Sep 17 00:00:00 2001 From: johannst Date: Sat, 20 Mar 2021 03:16:23 +0100 Subject: added chapter 03 hello dynld --- 03_hello_dynld/Makefile | 33 ++++++++++++ 03_hello_dynld/README.md | 127 +++++++++++++++++++++++++++++++++++++++++++++++ 03_hello_dynld/dynld.S | 31 ++++++++++++ 03_hello_dynld/dynld.c | 82 ++++++++++++++++++++++++++++++ 03_hello_dynld/main.c | 11 ++++ 5 files changed, 284 insertions(+) create mode 100644 03_hello_dynld/Makefile create mode 100644 03_hello_dynld/README.md create mode 100644 03_hello_dynld/dynld.S create mode 100644 03_hello_dynld/dynld.c create mode 100644 03_hello_dynld/main.c diff --git a/03_hello_dynld/Makefile b/03_hello_dynld/Makefile new file mode 100644 index 0000000..5764166 --- /dev/null +++ b/03_hello_dynld/Makefile @@ -0,0 +1,33 @@ +# Copyright (c) 2021 Johannes Stoelp + +COMMON_CFLAGS := -g -O0 -Wall -Wextra \ + -I../lib/include -nostdlib + +run: main + ./$< + +main: dynld.so main.c ../lib/libcommon.a + gcc -o $@ \ + $(COMMON_CFLAGS) \ + -Wl,--dynamic-linker=$(CURDIR)/dynld.so \ + $(filter %.c %.a, $^) + + readelf -W --dynamic $@ + readelf -W --string-dump .interp $@ + readelf -W --program-headers $@ + +dynld.so: dynld.S dynld.c ../lib/libcommon.a + gcc -o $@ \ + $(COMMON_CFLAGS) \ + -fPIC \ + -fvisibility=hidden \ + -Wl,--entry=dl_start \ + -Wl,--no-allow-shlib-undefined \ + $^ + +../lib/libcommon.a: + make -C ../lib + +clean: + rm -f main + rm -f dynld.so diff --git a/03_hello_dynld/README.md b/03_hello_dynld/README.md new file mode 100644 index 0000000..e79e13a --- /dev/null +++ b/03_hello_dynld/README.md @@ -0,0 +1,127 @@ +# Hello `dynld` + +### Goals +- Build dynamic linker `dynld.so` which retrieves the user program's + entrypoint (`AT_ENTRY`) from the auxiliary vector and transfers + control to it. +- Build `no-std` program with a custom `PT_INTERP` entry pointing to + `dynld.so`. + +--- + +## Crafting the `dynld.so` + +As described in the `goals` above, the idea in this section is to +create a simple dynamic linker which just gets the `entrypoint` of the +user application and then jumps to it. This means the linker does not +support things like: +- Loading of additional dependencies. +- Performing re-locations. + +That said, this dynamic linker will not be particularly useful but +act as a skeleton for the upcoming chapters. + +The `entrypoint` of the user executable started by the dynamic linker +can be found in the `auxiliary vector` setup by the Linux Kernel (see +[02_process_init](../02_process_init/README.md)). +The entry of interest is the `AT_ENTRY`: +```text +AT_ENTRY + The `a_ptr` member of this entry holds the entry point of + the application program to which the interpreter program should + transfer control. +``` + +There are two additional entries that need to be discussed, +`AT_EXECFD` and `AT_PHDR`. The x86_64 SystemV ABI states that the OS +Kernel must provide one or the other in the `auxiliary vector`. +For simplicity the dynamic linker in this section only supports +`AT_PHDR`, which means it requires the OS Kernel to already memory map +the user executable. +```text +AT_EXECFD + At process creation the system may pass control to an + interpreter program. When this happens, the system places + either an entry of type `AT_EXECFD` or one of type `AT_PHDR` + in the auxiliary vector. The entry for type `AT_EXECFD` + contains a file descriptor open to read the application + program’s object file. + +AT_PHDR + The system may create the memory image of the application + program before passing control to the interpreter + program. When this happens the `AT_PHDR` entry tells the + interpreter where to find the program header table in the + memory image. +``` + +Using the [`no-std` program](../02_process_init/entry.c) from chapter +[02_process_init](../02_process_init) as starting point, loading and +jumping to the `entrypoint` of the user program can be done as: +```c +void (*user_entry)() = (void (*)())auxv[AT_ENTRY]; +user_entry(); +``` + +## User program + +The next step is to create the user program that will be loaded by +the dynamic linker created in the previous section. +For now the functionality of the user program is not important, but it +must full-fill the requirements no to depend on any shared libraries or +contain any relocations. +For this purpose the following simple `no-std` program is used: +```c +#include +#include + +#include + +void _start() { + pfmt("Running %s @ %s\n", __FUNCTION__, __FILE__); + + syscall1(__NR_exit, 0); +} +``` + +The important step, when linking the user program, is to inform the +static linker to add the `dynld.so` created above in the `.interp` +section. This can be done with the following command line switch: +```bash +gcc ... -Wl,--dynamic-linker= ... +``` +> The full compile and link command can be seen in the [Makefile - main](./Makefile). + +The result can be seen in the `.interp` sections referenced by the +`PT_INTERP` segment in the program headers: +```bash +readelf -W --string-dump .interp main + +String dump of section '.interp': + [ 0] /home/johannst/dev/dynld/03_hello_dynld/dynld.so +``` +```bash +readelf -W --program-headers main + +Program Headers: + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x0002d8 0x0002d8 R 0x8 + INTERP 0x000318 0x0000000000000318 0x0000000000000318 0x000031 0x000031 R 0x1 + [Requesting program interpreter: /home/johannst/dev/dynld/03_hello_dynld/dynld.so] + ... +``` + +As discussed in [01_dynamic_linking](../01_dynamic_linking/README.md) +the `PT_INTERP` segment tells to Linux Kernel which dynamic linker to +load to handle the startup of the user executable. + +When running the `./main` user program, the `dynld.so` will be loaded +by the Linux Kernel and controlled will be handed over to it. The +`dynld.so` will retrieve the `entrypoint` of the user program and then +jump to it. + +## Things to remember +- As defined by the SystemV ABI the OS Kernel must either provide an + entry for `AT_EXECFD` or `AT_PHDR` in the `auxiliary vector`. +- The `AT_ENTRY` points to the `entrypoint` of the user program. +- When linking with gcc, the dynamic linker can be specified via `-Wl,--dynamic-linker=`. diff --git a/03_hello_dynld/dynld.S b/03_hello_dynld/dynld.S new file mode 100644 index 0000000..811fe2c --- /dev/null +++ b/03_hello_dynld/dynld.S @@ -0,0 +1,31 @@ +// Copyright (c) 2021 Johannes Stoelp + +#if !defined(__linux__) || !defined(__x86_64__) +# error "Only supported in linux(x86_64)!" +#endif + +#include + +.intel_syntax noprefix + +.section .text, "ax", @progbits +.global dl_start +dl_start: + // $rsp is guaranteed to be 16-byte aligned. + + // Clear $rbp as specified by the SysV AMD64 ABI. + xor rbp, rbp + + // Load pointer to process context prepared by execve(2) syscall as + // specified in the SysV AMD64 ABI. + // Save pointer in $rdi which is the arg0 (int/ptr) register. + lea rdi, [rsp] + + // Stack frames must be 16-byte aligned before control is transfered to the + // callees entry point. + call dl_entry + + // Call exit(1) syscall to indicate error, dl_entry should not return. + mov rdi, 1 + mov rax, __NR_exit + syscall diff --git a/03_hello_dynld/dynld.c b/03_hello_dynld/dynld.c new file mode 100644 index 0000000..1805b33 --- /dev/null +++ b/03_hello_dynld/dynld.c @@ -0,0 +1,82 @@ +// Copyright (c) 2021 Johannes Stoelp + +#include +#include +#include + +#include +#include + +#if !defined(__linux__) || !defined(__x86_64__) +# error "Only supported in linux(x86_64)!" +#endif + +void dl_entry(const uint64_t* prctx) { + // Interpret data on the stack passed by the OS kernel as specified in the + // x86_64 SysV ABI. + uint64_t argc = *prctx; + const char** argv = (const char**)(prctx + 1); + const char** envv = (const char**)(argv + argc + 1); + + // Count the number of environment variables in the `ENVP` segment. + int envc = 0; + for (const char** env = envv; *env; ++env) { + ++envc; + } + + uint64_t auxv[AT_MAX_CNT]; + for (unsigned i = 0; i < AT_MAX_CNT; ++i) { + auxv[i] = 0; + } + + // Read the `AUXV` auxiliary vector segment. + const Auxv64Entry* auxvp = (const Auxv64Entry*)(envv + envc + 1); + for (; auxvp->tag != AT_NULL; ++auxvp) { + if (auxvp->tag < AT_MAX_CNT) { + auxv[auxvp->tag] = auxvp->val; + } + } + + // Get address of the entrypoint for the user executable and + // transfer control. + // Requirements for the user executable: + // - no dependencies + // - no relocations + + pfmt("[dynld]: Running %s @ %s\n", __FUNCTION__, __FILE__); + + // Either `AT_EXECFD` or `AT_PHDR` must be specified, we only + // support `AT_PHDR` here. + // + // From the X86_64 SystemV ABI: + // AT_EXECFD + // At process creation the system may pass control to an + // interpreter program. When this happens, the system places + // either an entry of type `AT_EXECFD` or one of type `AT_PHDR` + // in the auxiliary vector. The entry for type `AT_EXECFD` + // contains a file descriptor open to read the application + // program’s object file. + // + // AT_PHDR + // The system may create the memory image of the application + // program before passing control to the interpreter + // program. When this happens the `AT_PHDR` entry tells the + // interpreter where to find the program header table in the + // memory image. + if (auxv[AT_PHDR] == 0 || auxv[AT_EXECFD] != 0) { + pfmt("[dynld]: ERROR, expected Linux Kernel to map user executable!\n"); + syscall1(__NR_exit, 1); + } + + if (auxv[AT_ENTRY] == 0) { + pfmt("[dynld]: ERROR, AT_ENTRY not found in auxiliary vector!\n"); + syscall1(__NR_exit, 1); + } + + // Transfer control to user executable. + void (*user_entry)() = (void (*)())auxv[AT_ENTRY]; + pfmt("[dynld]: Got user entrypoint @0x%x\n", user_entry); + user_entry(); + + syscall1(__NR_exit, 0); +} diff --git a/03_hello_dynld/main.c b/03_hello_dynld/main.c new file mode 100644 index 0000000..acf1ff7 --- /dev/null +++ b/03_hello_dynld/main.c @@ -0,0 +1,11 @@ +// Copyright (c) 2021 Johannes Stoelp + +#include +#include + +#include + +void _start() { + pfmt("Running %s @ %s\n", __FUNCTION__, __FILE__); + syscall1(__NR_exit, 0); +} -- cgit v1.2.3