aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Stoelp <johannes.stoelp@gmail.com>2025-01-27 01:20:19 +0100
committerJohannes Stoelp <johannes.stoelp@gmail.com>2025-01-27 01:33:00 +0100
commit2a26c1506192468be6c4cd06465bee861d87db51 (patch)
tree9ae5fc333e61c2478dfc57842daeacbf1dc78867
parent3f62112e3a1b180a9b931d6f43b3cdc74e7ba3b9 (diff)
downloadcpp-utils-main.tar.gz
cpp-utils-main.zip
elf: simple elf-parser for dynamic symbolsHEADmain
-rw-r--r--.clang-tidy2
-rw-r--r--.gitignore1
-rw-r--r--Makefile4
-rw-r--r--elf_parser.h228
-rw-r--r--test/elf_parser.cc59
-rw-r--r--test/elf_parser/Makefile12
-rw-r--r--test/elf_parser/test.cc7
7 files changed, 311 insertions, 2 deletions
diff --git a/.clang-tidy b/.clang-tidy
index 49c1de0..d3ff68b 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -54,7 +54,7 @@ CheckOptions:
- { key: readability-identifier-naming.EnumConstantPrefix, value: k }
- { key: readability-identifier-naming.ConstantCase, value: CamelCase }
- { key: readability-identifier-naming.ConstantPrefix, value: k }
- - { key: readability-identifier-naming.ConstantIgnoredRegexp, value: is_.*_v }
+ - { key: readability-identifier-naming.ConstantIgnoredRegexp, value: is_.*_v|m_.* }
- { key: readability-identifier-naming.FunctionCase, value: lower_case }
- { key: readability-identifier-naming.ParameterCase, value: lower_case }
diff --git a/.gitignore b/.gitignore
index 13ca83a..e3407a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ compile_commands.json
events.json
/build
.cache/
+test/elf_parser/libtest-*.h
diff --git a/Makefile b/Makefile
index 0cf7eb6..9c1941c 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@ LDFLAGS = $(SANITIZER)
# -- RULES ---------------------------------------------------------------------
-default: lint build $(BINS)
+default: build lint $(BINS)
run: $(RUNS)
@@ -33,6 +33,7 @@ build/%.o: test/%.cc
build:
mkdir -p build
+ make -C test/elf_parser
lint:
clang-format --dry-run -Werror $(shell find -name '*.cc' -o -name '*.h')
@@ -41,6 +42,7 @@ lint:
clean:
$(RM) -r build
$(RM) compile_commands.json events.json
+ make -C test/elf_parser clean
# Since DEPS files contain rules, include at the end.
-include $(DEPS)
diff --git a/elf_parser.h b/elf_parser.h
new file mode 100644
index 0000000..0fbe726
--- /dev/null
+++ b/elf_parser.h
@@ -0,0 +1,228 @@
+#ifndef UTILS_ELF_PARSER_H
+#define UTILS_ELF_PARSER_H
+
+#include <memory>
+#include <type_traits>
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include <elf.h>
+
+#define ENSURE(cond) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, __FILE__ ":%d '" #cond "' failed\n", __LINE__); \
+ abort(); \
+ } \
+ } while (0)
+
+// -- CURSOR -------------------------------------------------------------------
+
+/// cursor
+///
+/// A simple cursor over a slice of bytes, allowing to extract trivial types
+/// with added bound checks.
+class cursor {
+ public:
+ cursor(const unsigned char* bytes, size_t len) : m_bytes(bytes), m_len(len) {
+ ENSURE(bytes != nullptr);
+ }
+
+ /// Move cursor to the offset OFF.
+ void set_off(size_t off) {
+ ENSURE(off < m_len);
+ m_off = off;
+ }
+
+ /// Create a sub-slice from the cursor at offset OFF for LEN bytes.
+ [[nodiscard]] cursor slice(size_t off, size_t len) const {
+ ENSURE(off < m_len);
+ ENSURE(len <= m_len - off);
+ return {m_bytes + off, len};
+ }
+
+ /// Extract an instance of type T at the current cursor position, advance the
+ /// cursor by sizeof(T) or INC, if INC!=0.
+ template <typename T>
+ [[nodiscard]] T extract(size_t inc = 0) {
+ static_assert(std::is_trivially_constructible<T>::value,
+ "T trivial construct");
+ static_assert(std::is_trivially_copyable<T>::value, "T trivial copy");
+ ENSURE(sizeof(T) <= m_len - m_off);
+
+ T val;
+ std::memcpy(&val, m_bytes + m_off, sizeof(val));
+ m_off += inc == 0 ? sizeof(val) : inc;
+ return val;
+ }
+
+ /// Get a raw pointer at offset OFF.
+ [[nodiscard]] const unsigned char* ptr(size_t off) const {
+ ENSURE(off < m_len);
+ return m_bytes + off;
+ }
+
+ private:
+ const unsigned char* m_bytes;
+ const size_t m_len;
+ size_t m_off{0};
+};
+
+// -- ELF INTERFACE ------------------------------------------------------------
+
+/// Interface to interact with parsed elf files.
+struct elf {
+ [[nodiscard]] static std::unique_ptr<elf> parse(const unsigned char* bytes,
+ size_t len);
+ virtual ~elf() = default;
+
+ virtual void dynsyms(bool (*)(const char* name, char type, char bind)) = 0;
+};
+
+// -- ELF PARSER ---------------------------------------------------------------
+
+namespace detail {
+/// Trait describing 32-bit elf types.
+struct elf32 {
+ using ehdr = Elf32_Ehdr;
+ using shdr = Elf32_Shdr;
+ using dyn = Elf32_Dyn;
+ using sym = Elf32_Sym;
+
+ static constexpr unsigned char st_type(unsigned char st_info) {
+ return ELF32_ST_TYPE(st_info);
+ }
+ static constexpr unsigned char st_bind(unsigned char st_info) {
+ return ELF32_ST_BIND(st_info);
+ }
+};
+
+/// Trait describing 64-bit elf types.
+struct elf64 {
+ using ehdr = Elf64_Ehdr;
+ using shdr = Elf64_Shdr;
+ using dyn = Elf64_Dyn;
+ using sym = Elf64_Sym;
+
+ static constexpr unsigned char st_type(unsigned char st_info) {
+ return ELF64_ST_TYPE(st_info);
+ }
+ static constexpr unsigned char st_bind(unsigned char st_info) {
+ return ELF64_ST_BIND(st_info);
+ }
+};
+
+/// Elf file parser.
+template <typename Elf>
+class elf_parser : public elf {
+ using elf_ehdr = typename Elf::ehdr;
+ using elf_shdr = typename Elf::shdr;
+ using elf_dyn = typename Elf::dyn;
+ using elf_sym = typename Elf::sym;
+
+ public:
+ elf_parser(const unsigned char* bytes, size_t len) : m_bytes(bytes, len) {
+ // Extract elf header.
+ auto ehdr = m_bytes.extract<elf_ehdr>();
+
+ // Get section header associated with the string table for section names.
+ ENSURE(ehdr.e_shstrndx < ehdr.e_shnum);
+ m_bytes.set_off(ehdr.e_shoff + ehdr.e_shstrndx * ehdr.e_shentsize);
+ auto shdr_names = m_bytes.extract<elf_shdr>();
+
+ // Get slice for the section headers.
+ cursor shdr_bytes =
+ m_bytes.slice(ehdr.e_shoff, ehdr.e_shnum * ehdr.e_shentsize);
+
+ // Iterate section headers.
+ for (size_t i = 0; i < ehdr.e_shnum; ++i) {
+ // Extract section header.
+ auto shdr = shdr_bytes.extract<elf_shdr>(ehdr.e_shentsize);
+
+ switch (shdr.sh_type) {
+ case SHT_STRTAB:
+ if (std::memcmp(bytes + shdr_names.sh_offset + shdr.sh_name,
+ ".dynstr\0", 8) == 0) {
+ ENSURE(m_dynstrtab == 0);
+ m_dynstrtab = shdr.sh_offset;
+ m_dynstrlen = shdr.sh_size;
+ }
+ break;
+ case SHT_DYNSYM:
+ ENSURE(m_dynsymtab == 0);
+ m_dynsymtab = shdr.sh_offset;
+ m_dynsyment = shdr.sh_entsize;
+ m_dynsymnum = shdr.sh_size / m_dynsyment;
+ }
+ }
+
+ ENSURE(m_dynstrtab > 0);
+ ENSURE(m_dynstrlen > 0);
+ ENSURE(m_dynsymtab > 0);
+ ENSURE(m_dynsyment > 0);
+ ENSURE(m_dynsymnum > 0);
+ }
+
+ void dynsyms(bool (*handle)(const char* name,
+ char type,
+ char bind)) override {
+ cursor sym_bytes = m_bytes.slice(m_dynsymtab, m_dynsymnum * m_dynsyment);
+ cursor str_bytes = m_bytes.slice(m_dynstrtab, m_dynstrlen);
+ for (size_t i = 0; i < m_dynsymnum; ++i) {
+ auto sym = sym_bytes.extract<elf_sym>(m_dynsyment);
+ const char* name =
+ reinterpret_cast<const char*>(str_bytes.ptr(sym.st_name));
+ if (!handle(name, Elf::st_type(sym.st_info), Elf::st_bind(sym.st_info))) {
+ return;
+ }
+ }
+ }
+
+ private:
+ cursor m_bytes;
+
+ size_t m_dynstrtab{0};
+ size_t m_dynstrlen{0};
+
+ size_t m_dynsymtab{0};
+ size_t m_dynsyment{0};
+ size_t m_dynsymnum{0};
+};
+} // namespace detail
+
+// -- IMPL: ELF::PARSE ---------------------------------------------------------
+
+inline std::unique_ptr<elf> elf::parse(const unsigned char* bytes, size_t len) {
+ ENSURE(EI_NIDENT <= len);
+
+ // Check elf file magic.
+ ENSURE(std::memcmp(bytes, ELFMAG, SELFMAG) == 0);
+
+ // Support only native endianess.
+ switch (bytes[EI_DATA]) {
+ case ELFDATA2LSB:
+ ENSURE(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__);
+ break;
+ case ELFDATA2MSB:
+ ENSURE(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__);
+ break;
+ default:
+ ENSURE(false);
+ }
+
+ // Parse with correct bitness.
+ switch (bytes[EI_CLASS]) {
+ case ELFCLASS32:
+ puts("parse elf32");
+ return std::make_unique<detail::elf_parser<detail::elf32>>(bytes, len);
+ case ELFCLASS64:
+ puts("parse elf64");
+ return std::make_unique<detail::elf_parser<detail::elf64>>(bytes, len);
+ default:
+ ENSURE(false);
+ }
+}
+
+#endif
diff --git a/test/elf_parser.cc b/test/elf_parser.cc
new file mode 100644
index 0000000..6f16639
--- /dev/null
+++ b/test/elf_parser.cc
@@ -0,0 +1,59 @@
+#include "elf_parser.h"
+
+#include <cstddef>
+#include <cstdio>
+
+#include <elf.h>
+
+#include "elf_parser/libtest-32.h"
+#include "elf_parser/libtest-64.h"
+
+void dump_dynsyms(const unsigned char* bytes, size_t len) {
+ auto elf = elf::parse(bytes, len);
+
+ elf->dynsyms([](const char* name, char type, char bind) { // NOLINT
+ const char* bind_str = "<unnown>";
+ switch (bind) {
+ case STB_GLOBAL:
+ bind_str = "GLOBAL";
+ break;
+ case STB_LOCAL:
+ bind_str = "LOCAL";
+ break;
+ case STB_WEAK:
+ bind_str = "WEAK";
+ break;
+ default:
+ break;
+ }
+
+ const char* type_str = "<unknown>";
+ switch (type) {
+ case STT_NOTYPE:
+ type_str = "NOTYPE";
+ break;
+ case STT_FUNC:
+ type_str = "FUNC";
+ break;
+ case STT_OBJECT:
+ type_str = "OBJECT";
+ break;
+ case STT_SECTION:
+ type_str = "SECTION";
+ break;
+ case STT_FILE:
+ type_str = "FILE";
+ break;
+ default:
+ break;
+ }
+
+ printf("syms type: %10s bind: %10s name: %s\n", type_str, bind_str, name);
+ return true;
+ });
+}
+
+int main() {
+ dump_dynsyms(libtest_32_so, libtest_32_so_len);
+ dump_dynsyms(libtest_64_so, libtest_64_so_len);
+}
diff --git a/test/elf_parser/Makefile b/test/elf_parser/Makefile
new file mode 100644
index 0000000..ea726bd
--- /dev/null
+++ b/test/elf_parser/Makefile
@@ -0,0 +1,12 @@
+all: libtest-32.h libtest-64.h
+
+libtest-%.h: libtest-%.so
+ xxd -i $^ > $@
+ sed -i 's#^unsigned#// NOLINTNEXTLINE\nunsigned#' $@
+ clang-format -i $@
+
+libtest-%.so: test.cc
+ $(CC) -m$* -shared -fPIC -o $@ $^
+
+clean:
+ $(RM) libtest-*.so libtest-*.h
diff --git a/test/elf_parser/test.cc b/test/elf_parser/test.cc
new file mode 100644
index 0000000..7cc71dd
--- /dev/null
+++ b/test/elf_parser/test.cc
@@ -0,0 +1,7 @@
+extern "C" int exported_func_c(int x) {
+ return x + 1;
+}
+
+int exported_func_cpp(int x) {
+ return x + 1;
+}