From 2a26c1506192468be6c4cd06465bee861d87db51 Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Mon, 27 Jan 2025 01:20:19 +0100 Subject: elf: simple elf-parser for dynamic symbols --- .clang-tidy | 2 +- .gitignore | 1 + Makefile | 4 +- elf_parser.h | 228 +++++++++++++++++++++++++++++++++++++++++++++++ test/elf_parser.cc | 59 ++++++++++++ test/elf_parser/Makefile | 12 +++ test/elf_parser/test.cc | 7 ++ 7 files changed, 311 insertions(+), 2 deletions(-) create mode 100644 elf_parser.h create mode 100644 test/elf_parser.cc create mode 100644 test/elf_parser/Makefile create mode 100644 test/elf_parser/test.cc diff --git a/.clang-tidy b/.clang-tidy index 49c1de0..d3ff68b 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -54,7 +54,7 @@ CheckOptions: - { key: readability-identifier-naming.EnumConstantPrefix, value: k } - { key: readability-identifier-naming.ConstantCase, value: CamelCase } - { key: readability-identifier-naming.ConstantPrefix, value: k } - - { key: readability-identifier-naming.ConstantIgnoredRegexp, value: is_.*_v } + - { key: readability-identifier-naming.ConstantIgnoredRegexp, value: is_.*_v|m_.* } - { key: readability-identifier-naming.FunctionCase, value: lower_case } - { key: readability-identifier-naming.ParameterCase, value: lower_case } diff --git a/.gitignore b/.gitignore index 13ca83a..e3407a0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ compile_commands.json events.json /build .cache/ +test/elf_parser/libtest-*.h diff --git a/Makefile b/Makefile index 0cf7eb6..9c1941c 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ LDFLAGS = $(SANITIZER) # -- RULES --------------------------------------------------------------------- -default: lint build $(BINS) +default: build lint $(BINS) run: $(RUNS) @@ -33,6 +33,7 @@ build/%.o: test/%.cc build: mkdir -p build + make -C test/elf_parser lint: clang-format --dry-run -Werror $(shell find -name '*.cc' -o -name '*.h') @@ -41,6 +42,7 @@ lint: clean: $(RM) -r build $(RM) compile_commands.json events.json + make -C test/elf_parser clean # Since DEPS files contain rules, include at the end. -include $(DEPS) diff --git a/elf_parser.h b/elf_parser.h new file mode 100644 index 0000000..0fbe726 --- /dev/null +++ b/elf_parser.h @@ -0,0 +1,228 @@ +#ifndef UTILS_ELF_PARSER_H +#define UTILS_ELF_PARSER_H + +#include +#include + +#include +#include +#include + +#include + +#define ENSURE(cond) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, __FILE__ ":%d '" #cond "' failed\n", __LINE__); \ + abort(); \ + } \ + } while (0) + +// -- CURSOR ------------------------------------------------------------------- + +/// cursor +/// +/// A simple cursor over a slice of bytes, allowing to extract trivial types +/// with added bound checks. +class cursor { + public: + cursor(const unsigned char* bytes, size_t len) : m_bytes(bytes), m_len(len) { + ENSURE(bytes != nullptr); + } + + /// Move cursor to the offset OFF. + void set_off(size_t off) { + ENSURE(off < m_len); + m_off = off; + } + + /// Create a sub-slice from the cursor at offset OFF for LEN bytes. + [[nodiscard]] cursor slice(size_t off, size_t len) const { + ENSURE(off < m_len); + ENSURE(len <= m_len - off); + return {m_bytes + off, len}; + } + + /// Extract an instance of type T at the current cursor position, advance the + /// cursor by sizeof(T) or INC, if INC!=0. + template + [[nodiscard]] T extract(size_t inc = 0) { + static_assert(std::is_trivially_constructible::value, + "T trivial construct"); + static_assert(std::is_trivially_copyable::value, "T trivial copy"); + ENSURE(sizeof(T) <= m_len - m_off); + + T val; + std::memcpy(&val, m_bytes + m_off, sizeof(val)); + m_off += inc == 0 ? sizeof(val) : inc; + return val; + } + + /// Get a raw pointer at offset OFF. + [[nodiscard]] const unsigned char* ptr(size_t off) const { + ENSURE(off < m_len); + return m_bytes + off; + } + + private: + const unsigned char* m_bytes; + const size_t m_len; + size_t m_off{0}; +}; + +// -- ELF INTERFACE ------------------------------------------------------------ + +/// Interface to interact with parsed elf files. +struct elf { + [[nodiscard]] static std::unique_ptr parse(const unsigned char* bytes, + size_t len); + virtual ~elf() = default; + + virtual void dynsyms(bool (*)(const char* name, char type, char bind)) = 0; +}; + +// -- ELF PARSER --------------------------------------------------------------- + +namespace detail { +/// Trait describing 32-bit elf types. +struct elf32 { + using ehdr = Elf32_Ehdr; + using shdr = Elf32_Shdr; + using dyn = Elf32_Dyn; + using sym = Elf32_Sym; + + static constexpr unsigned char st_type(unsigned char st_info) { + return ELF32_ST_TYPE(st_info); + } + static constexpr unsigned char st_bind(unsigned char st_info) { + return ELF32_ST_BIND(st_info); + } +}; + +/// Trait describing 64-bit elf types. +struct elf64 { + using ehdr = Elf64_Ehdr; + using shdr = Elf64_Shdr; + using dyn = Elf64_Dyn; + using sym = Elf64_Sym; + + static constexpr unsigned char st_type(unsigned char st_info) { + return ELF64_ST_TYPE(st_info); + } + static constexpr unsigned char st_bind(unsigned char st_info) { + return ELF64_ST_BIND(st_info); + } +}; + +/// Elf file parser. +template +class elf_parser : public elf { + using elf_ehdr = typename Elf::ehdr; + using elf_shdr = typename Elf::shdr; + using elf_dyn = typename Elf::dyn; + using elf_sym = typename Elf::sym; + + public: + elf_parser(const unsigned char* bytes, size_t len) : m_bytes(bytes, len) { + // Extract elf header. + auto ehdr = m_bytes.extract(); + + // Get section header associated with the string table for section names. + ENSURE(ehdr.e_shstrndx < ehdr.e_shnum); + m_bytes.set_off(ehdr.e_shoff + ehdr.e_shstrndx * ehdr.e_shentsize); + auto shdr_names = m_bytes.extract(); + + // Get slice for the section headers. + cursor shdr_bytes = + m_bytes.slice(ehdr.e_shoff, ehdr.e_shnum * ehdr.e_shentsize); + + // Iterate section headers. + for (size_t i = 0; i < ehdr.e_shnum; ++i) { + // Extract section header. + auto shdr = shdr_bytes.extract(ehdr.e_shentsize); + + switch (shdr.sh_type) { + case SHT_STRTAB: + if (std::memcmp(bytes + shdr_names.sh_offset + shdr.sh_name, + ".dynstr\0", 8) == 0) { + ENSURE(m_dynstrtab == 0); + m_dynstrtab = shdr.sh_offset; + m_dynstrlen = shdr.sh_size; + } + break; + case SHT_DYNSYM: + ENSURE(m_dynsymtab == 0); + m_dynsymtab = shdr.sh_offset; + m_dynsyment = shdr.sh_entsize; + m_dynsymnum = shdr.sh_size / m_dynsyment; + } + } + + ENSURE(m_dynstrtab > 0); + ENSURE(m_dynstrlen > 0); + ENSURE(m_dynsymtab > 0); + ENSURE(m_dynsyment > 0); + ENSURE(m_dynsymnum > 0); + } + + void dynsyms(bool (*handle)(const char* name, + char type, + char bind)) override { + cursor sym_bytes = m_bytes.slice(m_dynsymtab, m_dynsymnum * m_dynsyment); + cursor str_bytes = m_bytes.slice(m_dynstrtab, m_dynstrlen); + for (size_t i = 0; i < m_dynsymnum; ++i) { + auto sym = sym_bytes.extract(m_dynsyment); + const char* name = + reinterpret_cast(str_bytes.ptr(sym.st_name)); + if (!handle(name, Elf::st_type(sym.st_info), Elf::st_bind(sym.st_info))) { + return; + } + } + } + + private: + cursor m_bytes; + + size_t m_dynstrtab{0}; + size_t m_dynstrlen{0}; + + size_t m_dynsymtab{0}; + size_t m_dynsyment{0}; + size_t m_dynsymnum{0}; +}; +} // namespace detail + +// -- IMPL: ELF::PARSE --------------------------------------------------------- + +inline std::unique_ptr elf::parse(const unsigned char* bytes, size_t len) { + ENSURE(EI_NIDENT <= len); + + // Check elf file magic. + ENSURE(std::memcmp(bytes, ELFMAG, SELFMAG) == 0); + + // Support only native endianess. + switch (bytes[EI_DATA]) { + case ELFDATA2LSB: + ENSURE(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__); + break; + case ELFDATA2MSB: + ENSURE(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__); + break; + default: + ENSURE(false); + } + + // Parse with correct bitness. + switch (bytes[EI_CLASS]) { + case ELFCLASS32: + puts("parse elf32"); + return std::make_unique>(bytes, len); + case ELFCLASS64: + puts("parse elf64"); + return std::make_unique>(bytes, len); + default: + ENSURE(false); + } +} + +#endif diff --git a/test/elf_parser.cc b/test/elf_parser.cc new file mode 100644 index 0000000..6f16639 --- /dev/null +++ b/test/elf_parser.cc @@ -0,0 +1,59 @@ +#include "elf_parser.h" + +#include +#include + +#include + +#include "elf_parser/libtest-32.h" +#include "elf_parser/libtest-64.h" + +void dump_dynsyms(const unsigned char* bytes, size_t len) { + auto elf = elf::parse(bytes, len); + + elf->dynsyms([](const char* name, char type, char bind) { // NOLINT + const char* bind_str = ""; + switch (bind) { + case STB_GLOBAL: + bind_str = "GLOBAL"; + break; + case STB_LOCAL: + bind_str = "LOCAL"; + break; + case STB_WEAK: + bind_str = "WEAK"; + break; + default: + break; + } + + const char* type_str = ""; + switch (type) { + case STT_NOTYPE: + type_str = "NOTYPE"; + break; + case STT_FUNC: + type_str = "FUNC"; + break; + case STT_OBJECT: + type_str = "OBJECT"; + break; + case STT_SECTION: + type_str = "SECTION"; + break; + case STT_FILE: + type_str = "FILE"; + break; + default: + break; + } + + printf("syms type: %10s bind: %10s name: %s\n", type_str, bind_str, name); + return true; + }); +} + +int main() { + dump_dynsyms(libtest_32_so, libtest_32_so_len); + dump_dynsyms(libtest_64_so, libtest_64_so_len); +} diff --git a/test/elf_parser/Makefile b/test/elf_parser/Makefile new file mode 100644 index 0000000..ea726bd --- /dev/null +++ b/test/elf_parser/Makefile @@ -0,0 +1,12 @@ +all: libtest-32.h libtest-64.h + +libtest-%.h: libtest-%.so + xxd -i $^ > $@ + sed -i 's#^unsigned#// NOLINTNEXTLINE\nunsigned#' $@ + clang-format -i $@ + +libtest-%.so: test.cc + $(CC) -m$* -shared -fPIC -o $@ $^ + +clean: + $(RM) libtest-*.so libtest-*.h diff --git a/test/elf_parser/test.cc b/test/elf_parser/test.cc new file mode 100644 index 0000000..7cc71dd --- /dev/null +++ b/test/elf_parser/test.cc @@ -0,0 +1,7 @@ +extern "C" int exported_func_c(int x) { + return x + 1; +} + +int exported_func_cpp(int x) { + return x + 1; +} -- cgit v1.2.3