From ef6a411ce8ff615d65e2be105834c2fdbe557de1 Mon Sep 17 00:00:00 2001 From: johannst Date: Sat, 20 Mar 2021 02:34:01 +0100 Subject: Split common headers in header/src files. --- lib/Makefile | 25 +++++++++++ lib/include/auxv.h | 36 ++++++++++++++++ lib/include/elf.h | 75 ++++++++++++++++++++++++++++++++ lib/include/fmt.h | 8 ++++ lib/include/io.h | 5 +++ lib/include/syscall.h | 63 +++++++++++++++++++++++++++ lib/src/fmt.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/src/io.c | 37 ++++++++++++++++ 8 files changed, 366 insertions(+) create mode 100644 lib/Makefile create mode 100644 lib/include/auxv.h create mode 100644 lib/include/elf.h create mode 100644 lib/include/fmt.h create mode 100644 lib/include/io.h create mode 100644 lib/include/syscall.h create mode 100644 lib/src/fmt.c create mode 100644 lib/src/io.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 0000000..29c103a --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,25 @@ +# Copyright (c) 2020 Johannes Stoelp + +HDR+=include/auxv.h +HDR+=include/elf.h +HDR+=include/fmt.h +HDR+=include/io.h +HDR+=include/syscall.h + +DEP+=src/io.o +DEP+=src/fmt.o + +libcommon.a: $(HDR) $(DEP) + ar -crs $@ $(filter %.o, $^) + +src/%.o: src/%.c + gcc -c -o $@ \ + -g -O0 \ + -Wall -Wextra \ + -I$(CURDIR)/include \ + -nostdlib \ + $< + +clean: + rm -f $(DEP) + rm -f libcommon.a diff --git a/lib/include/auxv.h b/lib/include/auxv.h new file mode 100644 index 0000000..42dac38 --- /dev/null +++ b/lib/include/auxv.h @@ -0,0 +1,36 @@ +// Copyright (c) 2020 Johannes Stoelp + +#pragma once + +#include + +/// ---------------- +/// Auxiliary Vector +/// ---------------- + +// NOTE: [x86-64] Either AT_EXECFD or AT_PHDR must be supplied by the Kernel. + +#define AT_NULL 0 /* [ignored] Mark end of auxiliary vetcor */ +#define AT_IGNORE 1 /* [ignored] */ +#define AT_EXECFD 2 /* [val] File descriptor of user program (in case Linux Kernel didn't mapped) */ +#define AT_PHDR 3 /* [ptr] Address of Phdr of use program (in case Kernel mapped user program) */ +#define AT_PHENT 4 /* [val] Size in bytes of one Phdr entry */ +#define AT_PHNUM 5 /* [val] Number of Phread entries */ +#define AT_PAGESZ 6 /* [val] System page size */ +#define AT_BASE 7 /* [ptr] `base address` interpreter was loaded to */ +#define AT_FLAGS 8 /* [val] */ +#define AT_ENTRY 9 /* [ptr] Entry point of user program */ +#define AT_NOTELF 10 /* [val] >0 if not an ELF file */ +#define AT_UID 11 /* [val] Real user id of process */ +#define AT_EUID 12 /* [val] Effective user id of process */ +#define AT_GID 13 /* [val] Real group id of process */ +#define AT_EGID 14 /* [val] Effective user id of process */ +#define AT_MAX_CNT 15 + +typedef struct { + uint64_t tag; + union { + uint64_t val; + void* ptr; + }; +} Auxv64Entry; diff --git a/lib/include/elf.h b/lib/include/elf.h new file mode 100644 index 0000000..a0fe6f7 --- /dev/null +++ b/lib/include/elf.h @@ -0,0 +1,75 @@ +// Copyright (c) 2020 Johannes Stoelp + +#pragma once + +#include + +/// -------------- +/// Program Header +/// -------------- + +#define PT_NULL 0 /* ignored */ +#define PT_LOAD 1 /* Mark loadable segment (allowed p_memsz > p_filesz). */ +#define PT_DYNAMIC 2 /* Location of .dynamic section */ +#define PT_INTERP 3 /* Location of .interp section */ +#define PT_NOTE 4 /* Location of auxiliary information */ +#define PT_SHLIB 5 /* Reserved, but unspecified semantic */ +#define PT_PHDR 6 /* Location & size of program headers itself */ + +#define PT_GNU_EH_FRAME 0x6474e550 /* [x86-64] stack unwinding tables */ +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff + +#define PF_X 0x1 /* Phdr flag eXecute flag bitmask */ +#define PF_W 0x2 /* Phdr flag Write flag bitmask */ +#define PF_R 0x4 /* Phdr flag Read flag bitmask */ + +typedef struct { + uint32_t type; // Segment kind. + uint32_t flags; // Flags describing Segment attributes like R, W, X. + uint64_t offset; // Offset into the file where the Segment starts. + uint64_t vaddr; // Virtual address of first byte of Segment in memory. + uint64_t paddr; // Physical address, ignored in our case. + uint64_t filesz; // Number of bytes of the Segment in the file image. + uint64_t memsz; // Number of bytes of the segement in memory. + uint64_t align; +} Elf64Phdr; + +/// --------------- +/// Dynamic Section +/// --------------- + +#define DT_NULL 0 /* [ignored] Marks end of dynamic section */ +#define DT_NEEDED 1 /* [val] Name of needed library */ +#define DT_PLTRELSZ 2 /* [val] Size in bytes of PLT relocs */ +#define DT_PLTGOT 3 /* [ptr] Processor defined value */ +#define DT_HASH 4 /* [ptr] Address of symbol hash table */ +#define DT_STRTAB 5 /* [ptr] Address of string table */ +#define DT_SYMTAB 6 /* [ptr] Address of symbol table */ +#define DT_RELA 7 /* [ptr] Address of Rela relocs */ +#define DT_RELASZ 8 /* [val] Total size of Rela relocs */ +#define DT_RELAENT 9 /* [val] Size of one Rela reloc */ +#define DT_STRSZ 10 /* [val] Size of string table */ +#define DT_SYMENT 11 /* [val] Size of one symbol table entry */ +#define DT_INIT 12 /* [ptr] Address of init function */ +#define DT_FINI 13 /* [ptr] Address of termination function */ +#define DT_SONAME 14 /* [val] Name of shared object */ +#define DT_RPATH 15 /* [val] Library search path (deprecated) */ +#define DT_SYMBOLIC 16 /* [ignored] Start symbol search here */ +#define DT_REL 17 /* [ptr] Address of Rel relocs */ +#define DT_RELSZ 18 /* [val] Total size of Rel relocs */ +#define DT_RELENT 19 /* [val] Size of one Rel reloc */ +#define DT_PLTREL 20 /* [val] Type of reloc in PLT */ +#define DT_DEBUG 21 /* [ptr] For debugging; unspecified */ +#define DT_TEXTREL 22 /* [ignored] Reloc might modify .text */ +#define DT_JMPREL 23 /* [ptr] Address of PLT relocs */ +#define DT_BIND_NOW 24 /* [ignored] Process relocations of object */ +#define DT_MAX_CNT 25 + +typedef struct { + uint64_t tag; + union { + uint64_t val; + void* ptr; + }; +} Elf64Dyn; diff --git a/lib/include/fmt.h b/lib/include/fmt.h new file mode 100644 index 0000000..61215bc --- /dev/null +++ b/lib/include/fmt.h @@ -0,0 +1,8 @@ +// Copyright (c) 2020 Johannes Stoelp + +#pragma once + +#include + +int vfmt(char* buf, unsigned long len, const char* fmt, va_list ap); +int fmt(char* buf, unsigned long len, const char* fmt, ...); diff --git a/lib/include/io.h b/lib/include/io.h new file mode 100644 index 0000000..5ca78a3 --- /dev/null +++ b/lib/include/io.h @@ -0,0 +1,5 @@ +// Copyright (c) 2020 Johannes Stoelp + +#pragma once + +int pfmt(const char* fmt, ...); diff --git a/lib/include/syscall.h b/lib/include/syscall.h new file mode 100644 index 0000000..4947155 --- /dev/null +++ b/lib/include/syscall.h @@ -0,0 +1,63 @@ +// Copyright (c) 2020 Johannes Stoelp + +#pragma once + +#if !defined(__linux__) || !defined(__x86_64__) +# error "Only supported on linux(x86_64)!" +#endif + +// Inline ASM +// Syntax: +// asm asm-qualifiers (AssemblerTemplate : OutputOperands : InputOperands : Clobbers) +// +// Output operand constraints: +// = | operand (variable) is written to by this instruction +// + | operand (variable) is written to / read from by this instruction +// +// Input/Output operand constraints: +// r | allocate general purpose register +// +// Machine specific constraints (x86_64): +// a | a register (eg rax) +// d | d register (eg rdx) +// D | di register (eg rdi) +// S | si register (eg rsi) +// +// Local register variables: +// In case a specific register is required which can not be specified via a +// machine specific constraint. +// ```c +// register long r12 asm ("r12") = 42; +// asm("nop" : : "r"(r12)); +// ``` +// +// Reference: +// https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html +// https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints +// https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html + +// Linux syscall ABI +// x86-64 +// #syscall: rax +// ret : rax +// instr : syscall +// args : rdi rsi rdx r10 r8 r9 +// +// Reference: +// syscall(2) + +#define argcast(A) ((long)(A)) +#define syscall1(n, a1) _syscall1(n, argcast(a1)) +#define syscall3(n, a1, a2, a3) _syscall3(n, argcast(a1), argcast(a2), argcast(a3)) + +static inline long _syscall1(long n, long a1) { + long ret; + asm volatile("syscall" : "=a"(ret) : "a"(n), "D"(a1) : "memory"); + return ret; +} + +static inline long _syscall3(long n, long a1, long a2, long a3) { + long ret; + asm volatile("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), "d"(a3) : "memory"); + return ret; +} diff --git a/lib/src/fmt.c b/lib/src/fmt.c new file mode 100644 index 0000000..be1ca3a --- /dev/null +++ b/lib/src/fmt.c @@ -0,0 +1,117 @@ +// Copyright (c) 2020 Johannes Stoelp + +#include + +static const char* num2dec(char* buf, unsigned long len, unsigned long long num) { + char* pbuf = buf + len - 1; + *pbuf = '\0'; + + if (num == 0) { + *(--pbuf) = '0'; + } + + while (num > 0 && pbuf != buf) { + char d = (num % 10) + '0'; + *(--pbuf) = d; + num /= 10; + } + return pbuf; +} + +static const char* num2hex(char* buf, unsigned long len, unsigned long long num) { + char* pbuf = buf + len - 1; + *pbuf = '\0'; + + if (num == 0) { + *(--pbuf) = '0'; + } + + while (num > 0 && pbuf != buf) { + char d = (num & 0xf); + *(--pbuf) = d + (d > 9 ? 'a' - 10 : '0'); + num >>= 4; + } + return pbuf; +} + +int vfmt(char* buf, unsigned long len, const char* fmt, va_list ap) { + unsigned i = 0; + +#define put(c) \ + { \ + char _c = (c); \ + if (i < len) { \ + buf[i] = _c; \ + } \ + ++i; \ + } + +#define puts(s) \ + while (*s) { \ + put(*s++); \ + } + + char scratch[16]; + int l_cnt = 0; + + while (*fmt) { + if (*fmt != '%') { + put(*fmt++); + continue; + } + + l_cnt = 0; + + continue_fmt: + switch (*(++fmt /* constume '%' */)) { + case 'l': + ++l_cnt; + goto continue_fmt; + case 'd': { + long val = l_cnt > 0 ? va_arg(ap, long) : va_arg(ap, int); + if (val < 0) { + val *= -1; + put('-'); + } + const char* ptr = num2dec(scratch, sizeof(scratch), val); + puts(ptr); + } break; + case 'x': { + unsigned long val = l_cnt > 0 ? va_arg(ap, unsigned long) : va_arg(ap, unsigned); + const char* ptr = num2hex(scratch, sizeof(scratch), val); + puts(ptr); + } break; + case 's': { + const char* ptr = va_arg(ap, const char*); + puts(ptr); + } break; + case 'p': { + const void* val = va_arg(ap, const void*); + const char* ptr = num2hex(scratch, sizeof(scratch), (unsigned long long)val); + put('0'); + put('x'); + puts(ptr); + } break; + default: + put(*fmt); + break; + } + ++fmt; + } + +#undef puts +#undef put + + if (buf) { + i < len ? (buf[i] = '\0') : (buf[len - 1] = '\0'); + } + return i; +} + +int fmt(char* buf, unsigned long len, const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + int ret = vfmt(buf, len, fmt, ap); + va_end(ap); + return ret; +} diff --git a/lib/src/io.c b/lib/src/io.c new file mode 100644 index 0000000..efe938b --- /dev/null +++ b/lib/src/io.c @@ -0,0 +1,37 @@ +// Copyright (c) 2020 Johannes Stoelp + +#include +#include +#include + +#include + +// `pfmt` uses fixed-size buffer on the stack for formating the message +// (for simplicity and since we don't impl buffered I/O). +// +// NOTE: This allows to specify a large buffer on the stack, but for +// the purpose of this study that's fine, we are cautious. +#define MAX_PRINTF_LEN 128 + +#define FD_STDOUT 1 +#define FD_STDERR 2 + +int pfmt(const char* fmt, ...) { + char buf[MAX_PRINTF_LEN]; + + va_list ap; + va_start(ap, fmt); + int ret = vfmt(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (ret > MAX_PRINTF_LEN - 1) { + syscall3(__NR_write, FD_STDERR, buf, MAX_PRINTF_LEN - 1); + + static const char warn[] = "\npfmt: Message truncated, max length can be configured by defining MAX_PRINTF_LEN\n"; + syscall3(__NR_write, FD_STDOUT, warn, sizeof(warn)); + return MAX_PRINTF_LEN - 1; + } + + syscall3(__NR_write, FD_STDOUT, buf, ret); + return ret; +} -- cgit v1.2.3