#ifndef UTILS_ELF_PARSER_H
#define UTILS_ELF_PARSER_H
#include <memory>
#include <type_traits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <elf.h>
#define ENSURE(cond) \
do { \
if (!(cond)) { \
fprintf(stderr, __FILE__ ":%d '" #cond "' failed\n", __LINE__); \
abort(); \
} \
} while (0)
// -- CURSOR -------------------------------------------------------------------
/// cursor
///
/// A simple cursor over a slice of bytes, allowing to extract trivial types
/// with added bound checks.
class cursor {
public:
cursor(const unsigned char* bytes, size_t len) : m_bytes(bytes), m_len(len) {
ENSURE(bytes != nullptr);
}
/// Move cursor to the offset OFF.
void set_off(size_t off) {
ENSURE(off < m_len);
m_off = off;
}
/// Create a sub-slice from the cursor at offset OFF for LEN bytes.
[[nodiscard]] cursor slice(size_t off, size_t len) const {
ENSURE(off < m_len);
ENSURE(len <= m_len - off);
return {m_bytes + off, len};
}
/// Extract an instance of type T at the current cursor position, advance the
/// cursor by sizeof(T) or INC, if INC!=0.
template <typename T>
[[nodiscard]] T extract(size_t inc = 0) {
static_assert(std::is_trivially_constructible<T>::value,
"T trivial construct");
static_assert(std::is_trivially_copyable<T>::value, "T trivial copy");
ENSURE(sizeof(T) <= m_len - m_off);
T val;
std::memcpy(&val, m_bytes + m_off, sizeof(val));
m_off += inc == 0 ? sizeof(val) : inc;
return val;
}
/// Get a raw pointer at offset OFF.
[[nodiscard]] const unsigned char* ptr(size_t off) const {
ENSURE(off < m_len);
return m_bytes + off;
}
private:
const unsigned char* m_bytes;
const size_t m_len;
size_t m_off{0};
};
// -- ELF INTERFACE ------------------------------------------------------------
/// Interface to interact with parsed elf files.
struct elf {
[[nodiscard]] static std::unique_ptr<elf> parse(const unsigned char* bytes,
size_t len);
virtual ~elf() = default;
virtual void dynsyms(bool (*)(const char* name, char type, char bind)) = 0;
};
// -- ELF PARSER ---------------------------------------------------------------
namespace detail {
/// Trait describing 32-bit elf types.
struct elf32 {
using ehdr = Elf32_Ehdr;
using shdr = Elf32_Shdr;
using dyn = Elf32_Dyn;
using sym = Elf32_Sym;
static constexpr unsigned char st_type(unsigned char st_info) {
return ELF32_ST_TYPE(st_info);
}
static constexpr unsigned char st_bind(unsigned char st_info) {
return ELF32_ST_BIND(st_info);
}
};
/// Trait describing 64-bit elf types.
struct elf64 {
using ehdr = Elf64_Ehdr;
using shdr = Elf64_Shdr;
using dyn = Elf64_Dyn;
using sym = Elf64_Sym;
static constexpr unsigned char st_type(unsigned char st_info) {
return ELF64_ST_TYPE(st_info);
}
static constexpr unsigned char st_bind(unsigned char st_info) {
return ELF64_ST_BIND(st_info);
}
};
/// Elf file parser.
template <typename Elf>
class elf_parser : public elf {
using elf_ehdr = typename Elf::ehdr;
using elf_shdr = typename Elf::shdr;
using elf_dyn = typename Elf::dyn;
using elf_sym = typename Elf::sym;
public:
elf_parser(const unsigned char* bytes, size_t len) : m_bytes(bytes, len) {
// Extract elf header.
auto ehdr = m_bytes.extract<elf_ehdr>();
// Get section header associated with the string table for section names.
ENSURE(ehdr.e_shstrndx < ehdr.e_shnum);
m_bytes.set_off(ehdr.e_shoff + ehdr.e_shstrndx * ehdr.e_shentsize);
auto shdr_names = m_bytes.extract<elf_shdr>();
// Get slice for the section headers.
cursor shdr_bytes =
m_bytes.slice(ehdr.e_shoff, ehdr.e_shnum * ehdr.e_shentsize);
// Iterate section headers.
for (size_t i = 0; i < ehdr.e_shnum; ++i) {
// Extract section header.
auto shdr = shdr_bytes.extract<elf_shdr>(ehdr.e_shentsize);
switch (shdr.sh_type) {
case SHT_STRTAB:
if (std::memcmp(bytes + shdr_names.sh_offset + shdr.sh_name,
".dynstr\0", 8) == 0) {
ENSURE(m_dynstrtab == 0);
m_dynstrtab = shdr.sh_offset;
m_dynstrlen = shdr.sh_size;
}
break;
case SHT_DYNSYM:
ENSURE(m_dynsymtab == 0);
m_dynsymtab = shdr.sh_offset;
m_dynsyment = shdr.sh_entsize;
m_dynsymnum = shdr.sh_size / m_dynsyment;
}
}
ENSURE(m_dynstrtab > 0);
ENSURE(m_dynstrlen > 0);
ENSURE(m_dynsymtab > 0);
ENSURE(m_dynsyment > 0);
ENSURE(m_dynsymnum > 0);
}
void dynsyms(bool (*handle)(const char* name,
char type,
char bind)) override {
cursor sym_bytes = m_bytes.slice(m_dynsymtab, m_dynsymnum * m_dynsyment);
cursor str_bytes = m_bytes.slice(m_dynstrtab, m_dynstrlen);
for (size_t i = 0; i < m_dynsymnum; ++i) {
auto sym = sym_bytes.extract<elf_sym>(m_dynsyment);
const char* name =
reinterpret_cast<const char*>(str_bytes.ptr(sym.st_name));
if (!handle(name, Elf::st_type(sym.st_info), Elf::st_bind(sym.st_info))) {
return;
}
}
}
private:
cursor m_bytes;
size_t m_dynstrtab{0};
size_t m_dynstrlen{0};
size_t m_dynsymtab{0};
size_t m_dynsyment{0};
size_t m_dynsymnum{0};
};
} // namespace detail
// -- IMPL: ELF::PARSE ---------------------------------------------------------
inline std::unique_ptr<elf> elf::parse(const unsigned char* bytes, size_t len) {
ENSURE(EI_NIDENT <= len);
// Check elf file magic.
ENSURE(std::memcmp(bytes, ELFMAG, SELFMAG) == 0);
// Support only native endianess.
switch (bytes[EI_DATA]) {
case ELFDATA2LSB:
ENSURE(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__);
break;
case ELFDATA2MSB:
ENSURE(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__);
break;
default:
ENSURE(false);
}
// Parse with correct bitness.
switch (bytes[EI_CLASS]) {
case ELFCLASS32:
puts("parse elf32");
return std::make_unique<detail::elf_parser<detail::elf32>>(bytes, len);
case ELFCLASS64:
puts("parse elf64");
return std::make_unique<detail::elf_parser<detail::elf64>>(bytes, len);
default:
ENSURE(false);
}
}
#endif