aboutsummaryrefslogtreecommitdiff
path: root/02_process_init
diff options
context:
space:
mode:
Diffstat (limited to '02_process_init')
-rw-r--r--02_process_init/Makefile18
-rw-r--r--02_process_init/README.md294
-rw-r--r--02_process_init/entry.S27
-rw-r--r--02_process_init/entry.c75
4 files changed, 414 insertions, 0 deletions
diff --git a/02_process_init/Makefile b/02_process_init/Makefile
new file mode 100644
index 0000000..6e1f512
--- /dev/null
+++ b/02_process_init/Makefile
@@ -0,0 +1,18 @@
+# Copyright (c) 2020 Johannes Stoelp
+
+show: entry
+ #gdb -q --batch -ex 'starti' -ex 'x/g $$rsp' -ex 'x/s *(char**)($$rsp+8)' ./$^
+ ./entry 1 2 3 4
+
+entry: entry.S entry.c
+ gcc -o $@ \
+ -I ../include \
+ -Wall -Wextra \
+ -static \
+ -nostartfiles -nodefaultlibs \
+ -g -O0 \
+ $^
+
+
+clean:
+ rm -f entry
diff --git a/02_process_init/README.md b/02_process_init/README.md
new file mode 100644
index 0000000..5e4e7b3
--- /dev/null
+++ b/02_process_init/README.md
@@ -0,0 +1,294 @@
+# Process Initialization
+
+Before starting to implement a minimal dynamic linker the first step is to
+understand the `process initialization` in further depth.
+Which is important because when starting a new process
+- the dynamic linker must setup the execution environment for the user program
+ (eg load dependencies, pass command line arguments)
+- the control is first passed to the dynamic linker (interpreter) by
+ the Linux Kernel as mentioned in
+ [01_dynamic_linking](../01_dynamic_linking/README.md)
+- the dynamic linker must be a stand-alone executable with no dependencies
+
+Before transferring control to a new user process the Linux Kernel provides some
+data on the `stack` with the format following the specification in the
+[`SystemV x86-64 ABI`][sysv_x86_64] chapter _Initial Stack and Register State_.
+
+## Stack state on process entry
+
+On process startup after `execve(2)` the stack looks as follows
+```text
+ +------------+ High Address
+ | .. |
+ | ENV strs |<-+
+ +->| ARG strs | |
+ | | .. | |
+ | +------------+ |
+ | | .. | |
+ | +------------+ |
+ | | AT_NULL | |
+ | +------------+ |
+ | | AUXV | |
+ | +------------+ |
+ | | 0x0 | |
+ | +------------+ |
+ | | ENVP |--+
+ | +------------+
+ | | 0x0 |
+ | +------------+
+ +--| ARGV |
+ +------------+
+ $rsp ->| ARGC |
+ +------------+ Low Address
+
+
+ | Offset (in bytes) | Type | Description
+-----+-----------------------+------------------------+--------------------
+AUXV | &ENVP + 8*#ENVP + 8 | struct { uint64_t[2] } | Auxiliary Vector
+ 0x0 | &ENVP + 8*#ENVP | | 0 terinator (ENVP)
+ENVP | &ARGV + 8*ARGC + 8 | const char* [] | Environment ptrs
+ 0x0 | &ARGV + 8*ARGC | | 0 terinator (ARGV)
+ARGV | $rsp + 8 | const char* [] | Argument ptrs
+ARGC | $rsp | uint64_t | Argument count
+```
+
+Where `ARGV` is an array of pointers to strings holding the command line
+arguments passed to the user program and `ARGC` the number of arguments passed
++1 as `ARGV[0]` holds the path of the program started. Similar `ENVP` is an
+array of pointers to strings holding the environment variables as seen by this
+process.
+The `AUXV` is the auxiliary vector and holds additional information as for
+example the `entry point` or the `program header` of the program. Entries in
+`AUXV` are encoded as given
+in `AuxvEntry`.
+```c
+struct AuxvEntry {
+ uint64_t tag;
+ uint64_t val;
+};
+```
+The [`x86-64 System V ABI`][sysv_x86_64] chapter _Auxiliary Vector_ specifies
+the following tags
+```text
+AT_NULL = 0
+AT_IGNORE = 1
+AT_EXECFD = 2
+AT_PHDR = 3
+AT_PHENT = 4
+AT_PHNUM = 5
+AT_PAGESZ = 6
+AT_BASE = 7
+AT_FLAGS = 8
+AT_ENTRY = 9
+AT_NOTELF = 10
+AT_UID = 11
+AT_EUID = 12
+AT_GID = 13
+AT_EGID = 14
+```
+Where `AT_NULL` is used to indicate the end of `AUXV`.
+
+## Register state on process entry
+
+Regarding the state of general purpose registers on process entry the
+[`x86-64 SystemV ABI`][sysv_x86_64] states that all registers except the ones listed
+below are in an unspecified state:
+- `$rbp`: content is unspecified, but user code should set it to zero to mark
+ the deepest stack frame
+- `$rsp`: points to the beginning of the data block provided by the Kernel and
+ is guaranteed to be 16-byte aligned at process entry
+- `$rdx`: function pointer that the application should register with
+ `atexit(BA_OS)`.
+> Not sure here if clearing `$rbp` is strictly required as frame-pointer
+> chaining is optional and can be omitted (eg `gcc -fomit-frame-pointer`).
+
+## Hands-on the first instruction
+
+Before exploring and visualizing the data passed by the Linux Kernel on the
+stack there is one more question to answer:
+**How to run the first instruction in a process?**
+
+Typically when building a `C` program the users entry point is the `main`
+function, however this won't contain the first instruction executed after the
+process entry. This can be seen by extracting the `entry point` from the ELF
+header and checking against the symbols in the program. Here the entry point is
+`0x1020` which belongs to the symbol `_start` and not `main`.
+```bash
+readelf -h main | grep Entry
+ Entry point address: 0x1020
+
+nm main | grep '1020\|main'
+ 0000000000001119 T main
+ 0000000000001020 T _start
+```
+
+This is because by default the `static linker` adds some extra code & libraries
+to the program like for example the `libc` and the `C-runtime (crt)` which
+contains the `_start` symbol and hence the first instruction executed.
+
+Passing `--trace` down to the `static linker` it sheds some light onto which
+input files the static linker actually processes.
+```bash
+echo 'void main() {}' | gcc -x c -o /dev/null - -Wl,--trace
+/usr/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../lib/Scrt1.o
+/usr/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../lib/crti.o
+/usr/lib/gcc/x86_64-pc-linux-gnu/10.2.0/crtbeginS.o
+/tmp/ccjZdjYx.o
+/usr/lib/gcc/x86_64-pc-linux-gnu/10.2.0/libgcc.a
+/usr/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../lib/libgcc_s.so
+/usr/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../lib/libc.so
+/usr/lib/ld-linux-x86-64.so.2
+/usr/lib/gcc/x86_64-pc-linux-gnu/10.2.0/crtendS.o
+/usr/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../lib/crtn.o
+```
+> `/tmp/ccjZdjYx.o` is a temporary file created by the compiler containing the
+> code echoed.
+
+The static linker can be explicitly told to not include any default files by
+using the `gcc -nostdlib` argument.
+```bash
+echo 'void _start() {}' | gcc -x c -o /dev/null - -Wl,--trace -nostdlib
+/tmp/ccbfkCoZ.o
+```
+Quoting `man gcc`
+> `-nostdlib` Do not use the standard system startup files or libraries when linking.
+
+## Examining the data from the Kernel
+
+With the capability to control the first instruction executed after process
+entry we finally can visualize the data passed by the Linux Kernel on the stack.
+
+First we provide the symbol `_start` (default entry point) which saves a
+pointer to the Kernel data in `$rdi` and jumps to a function called `entry`.
+The pointer is saved in `$rdi` because that's the register for the first
+argument of class `INTEGER` ([SystemV ABI Function Arugments][sysv_x86_64_fnarg]).
+```asm
+.section .text, "ax", @progbits
+.global _start
+_start:
+ // Clear $rbp.
+ xor rbp, rbp
+
+ // Load ptr to Kernel data.
+ lea rdi, [rsp]
+
+ call entry
+ ...
+```
+The full source code of the `_start` function is available in [entry.S](./entry.S).
+
+The pointer passed to the `entry` function can be used to compute `ARGC`,
+`ARGV` and `ENVP` accordingly.
+```c
+void entry(long* prctx) {
+ long argc = *prctx;
+ const char** argv = (const char**)(prctx + 1);
+ const char** envv = (const char**)(argv + argc + 1);
+ ...
+```
+
+To collect the `AUXV` entries we first need to count the number of environment
+variables as follows.
+```c
+// entry
+ ...
+ int envc = 0;
+ for (const char** env = envv; *env; ++env) {
+ ++envc;
+ }
+
+ uint64_t auxv[AT_MAX_CNT];
+ for (unsigned i = 0; i < AT_MAX_CNT; ++i) {
+ auxv[i] = 0;
+ }
+
+ const uint64_t* auxvp = (const uint64_t*)(envv + envc + 1);
+ for (unsigned i = 0; auxvp[i] != AT_NULL; i += 2) {
+ if (auxvp[i] < AT_MAX_CNT) {
+ auxv[auxvp[i]] = auxvp[i + 1];
+ }
+ }
+ ...
+```
+
+Finally the data can be printed as
+```c
+// entry
+ ...
+ dynld_printf("Got %d arg(s)\n", argc);
+ for (const char** arg = argv; *arg; ++arg) {
+ dynld_printf("\targ = %s\n", *arg);
+ }
+
+ const int max_env = 10;
+ dynld_printf("Print first %d env var(s)\n", max_env - 1);
+ for (const char** env = envv; *env && (env - envv < max_env); ++env) {
+ dynld_printf("\tenv = %s\n", *env);
+ }
+
+ dynld_printf("Print auxiliary vector\n");
+ dynld_printf("\tAT_EXECFD: %ld\n", auxv[AT_EXECFD]);
+ dynld_printf("\tAT_PHDR : %p\n", auxv[AT_PHDR]);
+ dynld_printf("\tAT_PHENT : %ld\n", auxv[AT_PHENT]);
+ dynld_printf("\tAT_PHNUM : %ld\n", auxv[AT_PHNUM]);
+ dynld_printf("\tAT_PAGESZ: %ld\n", auxv[AT_PAGESZ]);
+ dynld_printf("\tAT_BASE : %lx\n", auxv[AT_BASE]);
+ dynld_printf("\tAT_FLAGS : %ld\n", auxv[AT_FLAGS]);
+ dynld_printf("\tAT_ENTRY : %p\n", auxv[AT_ENTRY]);
+ dynld_printf("\tAT_NOTELF: %lx\n", auxv[AT_NOTELF]);
+ dynld_printf("\tAT_UID : %ld\n", auxv[AT_UID]);
+ dynld_printf("\tAT_EUID : %ld\n", auxv[AT_EUID]);
+ dynld_printf("\tAT_GID : %ld\n", auxv[AT_GID]);
+ dynld_printf("\tAT_EGID : %ld\n", auxv[AT_EGID]);
+ ...
+```
+The full source code of the `entry` function is available in [entry.c](./entry.c).
+
+Running the program as `./entry 1 2 3 4` it yields following output:
+```text
+Got 5 arg(s)
+ arg = ./entry
+ arg = 1
+ arg = 2
+ arg = 3
+ arg = 4
+Print first 9 env var(s)
+ env = I3SOCK=/run/user/1000/i3/ipc-socket.1200
+ env = LC_NAME=en_US.UTF-8
+ env = LC_NUMERIC=en_US.UTF-8
+ env = WINDOWID=46221701
+ env = LC_ADDRESS=en_US.UTF-8
+ env = GDM_LANG=en_US.utf8
+ env = PWD=/home/johannst/dev/dynld/02_process_init
+ env = MAIL=/var/spool/mail/johannst
+ env = XDG_SESSION_PATH=/org/freedesktop/DisplayManager/Session env = LANG=en_US.utf8
+Print auxiliary vector
+ AT_EXECFD: 0
+ AT_PHDR : 0x400040
+ AT_PHENT : 56
+ AT_PHNUM : 5
+ AT_PAGESZ: 4096
+ AT_BASE : 0
+ AT_FLAGS : 0
+ AT_ENTRY : 0x401000
+ AT_NOTELF: 0
+ AT_UID : 1000
+ AT_EUID : 1000
+ AT_GID : 1000
+ AT_EGID : 1000
+```
+
+## Things to remember
+- On process entry the Linux Kernel provides data on the stack as specified in
+ the `SystemV ABI`
+- By default the `static linker` adds additional code which contains the
+ `_start` symbol being the default process `entry point`
+
+## References & Source Code
+- [x86-64 SystemV ABI][sysv_x86_64]
+- [x86-64 SystemV ABI - Passing arguments to functions][sysv_x86_64_fnarg]
+- [entry.S](./entry.S)
+- [entry.c](./entry.c)
+
+[sysv_x86_64]: https://www.uclibc.org/docs/psABI-x86_64.pdf
+[sysv_x86_64_fnarg]: https://johannst.github.io/notes/arch/x86_64.html#passing-arguments-to-functions
diff --git a/02_process_init/entry.S b/02_process_init/entry.S
new file mode 100644
index 0000000..50425ba
--- /dev/null
+++ b/02_process_init/entry.S
@@ -0,0 +1,27 @@
+// Copyright (c) 2020 Johannes Stoelp
+
+#include <asm/unistd.h>
+
+.intel_syntax noprefix
+
+.section .text, "ax", @progbits
+.global _start
+_start:
+ // $rsp is guaranteed to be 16-byte aligned.
+
+ // Clear $rbp as specified by the SysV AMD64 ABI.
+ xor rbp, rbp
+
+ // Load pointer to process context prepared by execve(2) syscall as
+ // specified in the SysV AMD64 ABI.
+ // Save pointer in $rdi which is the arg0 (int/ptr) register.
+ lea rdi, [rsp]
+
+ // Stack frames must be 16-byte aligned before control is transfered to the
+ // callees entry point.
+ call entry
+
+ // Call exit(0) syscall.
+ mov rdi, 0
+ mov rax, __NR_exit
+ syscall
diff --git a/02_process_init/entry.c b/02_process_init/entry.c
new file mode 100644
index 0000000..a6b0918
--- /dev/null
+++ b/02_process_init/entry.c
@@ -0,0 +1,75 @@
+// Copyright (c) 2020 Johannes Stoelp
+
+#include <asm/unistd.h>
+#include <elf.h>
+#include <fmt.h>
+#include <stdint.h>
+#include <syscall.h>
+
+#if !defined(__linux__) || !defined(__x86_64__)
+# error "Only supported in linux(x86_64)!"
+#endif
+
+int dynld_printf(const char* fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ char buf[64];
+ int ret = dynld_vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+ syscall3(__NR_write, 1 /* stdout */, buf, ret);
+ return ret;
+}
+
+void entry(long* prctx) {
+ // Interpret data on the stack passed by the OS kernel as specified in the
+ // x86_64 SysV ABI.
+
+ long argc = *prctx;
+ const char** argv = (const char**)(prctx + 1);
+ const char** envv = (const char**)(argv + argc + 1);
+
+ int envc = 0;
+ for (const char** env = envv; *env; ++env) {
+ ++envc;
+ }
+
+ uint64_t auxv[AT_MAX_CNT];
+ for (unsigned i = 0; i < AT_MAX_CNT; ++i) {
+ auxv[i] = 0;
+ }
+
+ const uint64_t* auxvp = (const uint64_t*)(envv + envc + 1);
+ for (unsigned i = 0; auxvp[i] != AT_NULL; i += 2) {
+ if (auxvp[i] < AT_MAX_CNT) {
+ auxv[auxvp[i]] = auxvp[i + 1];
+ }
+ }
+
+ // Print for demonstration
+
+ dynld_printf("Got %d arg(s)\n", argc);
+ for (const char** arg = argv; *arg; ++arg) {
+ dynld_printf("\targ = %s\n", *arg);
+ }
+
+ const int max_env = 10;
+ dynld_printf("Print first %d env var(s)\n", max_env - 1);
+ for (const char** env = envv; *env && (env - envv < max_env); ++env) {
+ dynld_printf("\tenv = %s\n", *env);
+ }
+
+ dynld_printf("Print auxiliary vector\n");
+ dynld_printf("\tAT_EXECFD: %ld\n", auxv[AT_EXECFD]);
+ dynld_printf("\tAT_PHDR : %p\n", auxv[AT_PHDR]);
+ dynld_printf("\tAT_PHENT : %ld\n", auxv[AT_PHENT]);
+ dynld_printf("\tAT_PHNUM : %ld\n", auxv[AT_PHNUM]);
+ dynld_printf("\tAT_PAGESZ: %ld\n", auxv[AT_PAGESZ]);
+ dynld_printf("\tAT_BASE : %lx\n", auxv[AT_BASE]);
+ dynld_printf("\tAT_FLAGS : %ld\n", auxv[AT_FLAGS]);
+ dynld_printf("\tAT_ENTRY : %p\n", auxv[AT_ENTRY]);
+ dynld_printf("\tAT_NOTELF: %lx\n", auxv[AT_NOTELF]);
+ dynld_printf("\tAT_UID : %ld\n", auxv[AT_UID]);
+ dynld_printf("\tAT_EUID : %ld\n", auxv[AT_EUID]);
+ dynld_printf("\tAT_GID : %ld\n", auxv[AT_GID]);
+ dynld_printf("\tAT_EGID : %ld\n", auxv[AT_EGID]);
+}