From 2b0d09a2c446765b8d952b648596585c456107ba Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Mon, 30 Jan 2023 22:29:40 +0100 Subject: move from const generic to iterator approach This implies that we parse the phdr every time when we iterator over them however it also gives the flexibility of not providing an upper bound of supported load segments during compile time. Trading flexibility vs potential repetitive work is totally fine as in use cases this crate is used, the phdrs are usually iterated once or twice. --- src/lib.rs | 431 +++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 276 insertions(+), 155 deletions(-) (limited to 'src/lib.rs') diff --git a/src/lib.rs b/src/lib.rs index 9d79e73..313a930 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,10 +6,6 @@ pub enum Error { WrongElfMagic, /// No more bytes left while parsing the ELF file. OutOfBytes, - /// Const generic on `Elf` is too small to hold all `LOAD` from the ELF file. - OutOfLoadSegments, - /// Failed to convert between data types internally. - TypeConversion(&'static str), /// Unknown value in `e_ident:EI_CLASS` byte. UnknownBitness(u8), /// Unknown value in `e_ident:EI_DATA` byte. @@ -20,12 +16,14 @@ pub enum Error { type Result = core::result::Result; +/// Helper trait to define trait bounds providing endian aware construction methods. trait FromEndian: Sized { const N: usize = core::mem::size_of::(); fn from_le_bytes>(bytes: B) -> Option; fn from_be_bytes>(bytes: B) -> Option; } +/// Helper macro to easily implement [`FromEndian`] trait for basic types. macro_rules! impl_from_endian { ($ty: ty) => { impl FromEndian for $ty { @@ -55,6 +53,72 @@ impl_from_endian!(u16); impl_from_endian!(u32); impl_from_endian!(u64); +/// Helper to safely construct generic types from a stream of bytes. +struct ByteReader<'bytes> { + bytes: &'bytes [u8], + pos: usize, +} + +impl<'bytes> ByteReader<'bytes> { + /// Construct a new [`ByteReader`] instance from a slice of bytes. + const fn new(bytes: &'bytes [u8]) -> ByteReader<'_> { + ByteReader { bytes, pos: 0 } + } + + /// Safely extract a slice of bytes with the given length `len`. + fn read_slice(&mut self, len: usize) -> Result<&'bytes [u8]> { + if let Some(bytes) = self.bytes.get(self.pos..self.pos + len) { + self.bump(len); + Ok(bytes) + } else { + Err(Error::OutOfBytes) + } + } + + /// Safely extract an `E` with the endianess given by [`en`][Endian]. + fn read(&mut self, en: Endian) -> Result { + let bytes = self.bytes.get(self.pos..).ok_or(Error::OutOfBytes)?; + + let val = match en { + Endian::Little => E::from_le_bytes(&bytes), + Endian::Big => E::from_be_bytes(&bytes), + }; + + if val.is_some() { + self.bump(E::N); + } + + val.ok_or(Error::OutOfBytes) + } + + /// Safely extract a value of size [`bit`][Bit] with the endianess given by [`en`][Endian]. + fn read_native(&mut self, en: Endian, bit: Bit) -> Result { + match bit { + Bit::Bit32 => self.read::(en).map(u64::from), + Bit::Bit64 => self.read::(en), + } + } + + /// Increment the current position of the [`ByteReader`] by `inc`. + #[inline] + fn bump(&mut self, inc: usize) { + self.pos += inc; + } + + /// Set the current position of the [`ByteReader`] to `pos`. + #[inline] + fn set_pos(&mut self, pos: usize) { + self.pos = pos; + } + + /// Get the current position of the [`ByteReader`]. + #[inline] + const fn pos(&self) -> usize { + self.pos + } +} + +/// Possible ELF endian variants. #[derive(Debug, Clone, Copy)] enum Endian { Little, @@ -73,6 +137,7 @@ impl TryFrom for Endian { } } +/// Possible ELF bit variants. #[derive(Debug, Clone, Copy)] enum Bit { Bit32, @@ -100,6 +165,7 @@ impl Into for Bit { } } +/// Possible ELF machine variants. #[derive(Debug, Clone, Copy)] pub enum Machine { X86_64, @@ -118,218 +184,273 @@ impl TryFrom for Machine { } } -struct ElfReader<'bytes> { - bytes: &'bytes [u8], - pos: usize, +/// Possible ELF program header variants. +#[derive(Clone, Copy)] +pub enum SegmentType { + Load, + Dynamic, + Interp, + Note, + Phdr, + Unknown(u32), } -impl<'bytes> ElfReader<'bytes> { - const fn new(bytes: &'bytes [u8]) -> ElfReader<'_> { - ElfReader { bytes, pos: 0 } - } - - fn read_slice(&mut self, len: usize) -> Result<&'bytes [u8]> { - if let Some(bytes) = self.bytes.get(self.pos..self.pos + len) { - self.bump(len); - Ok(bytes) - } else { - Err(Error::OutOfBytes) +impl From for SegmentType { + fn from(v: u32) -> Self { + match v { + 1 => SegmentType::Load, + 2 => SegmentType::Dynamic, + 3 => SegmentType::Interp, + 6 => SegmentType::Phdr, + _ => SegmentType::Unknown(v), } } +} - fn read(&mut self, en: Endian) -> Result { - let bytes = self.bytes.get(self.pos..).ok_or(Error::OutOfBytes)?; +/// An ELF file program header and segment bytes. +pub struct Segment<'bytes> { + bytes: &'bytes [u8], + vaddr: u64, + paddr: u64, + filesz: u64, + memsz: u64, + flags: u32, + typ: SegmentType, +} - let val = match en { - Endian::Little => E::from_le_bytes(&bytes), - Endian::Big => E::from_be_bytes(&bytes), - }; +impl Segment<'_> { + /// Check if `addr` falls into the virtual address range covered by the segment. + #[inline] + pub const fn contains(&self, addr: u64) -> bool { + self.vaddr <= addr && addr < (self.vaddr + self.memsz) + } - if val.is_some() { - self.bump(E::N); - } + /// ELF segment raw bytes. + #[inline] + pub const fn bytes(&self) -> &'_ [u8] { + self.bytes + } - val.ok_or(Error::OutOfBytes) + /// ELF segment virtual address. + #[inline] + pub const fn vaddr(&self) -> u64 { + self.vaddr } - fn read_native(&mut self, en: Endian, bt: Bit) -> Result { - match bt { - Bit::Bit32 => self.read::(en).map(u64::from), - Bit::Bit64 => self.read::(en), - } + /// ELF segment physical address. + #[inline] + pub const fn paddr(&self) -> u64 { + self.paddr } + /// ELF segment physical address. #[inline] - fn bump(&mut self, inc: usize) { - self.pos += inc; + pub const fn zero_padding(&self) -> u64 { + self.memsz - self.filesz } + /// Indicate whether segment is `executable`. #[inline] - fn set_pos(&mut self, pos: usize) { - self.pos = pos; + pub const fn exec(&self) -> bool { + const PF_X: u32 = 1 << 0; + (self.flags & PF_X) != 0 } + /// Indicate whether segment is `writeable`. #[inline] - const fn pos(&self) -> usize { - self.pos + pub const fn write(&self) -> bool { + const PF_W: u32 = 1 << 1; + (self.flags & PF_W) != 0 } -} -#[derive(Debug, Clone, Copy)] -pub struct LoadSegment<'bytes> { - pub vaddr: u64, - pub bytes: &'bytes [u8], - pub zero_pad: u64, - pub x: bool, - pub w: bool, - pub r: bool, -} + /// Indicate whether segment is `readable`. + #[inline] + pub const fn read(&self) -> bool { + const PF_R: u32 = 1 << 2; + (self.flags & PF_R) != 0 + } -impl LoadSegment<'_> { + /// ELF segment type. #[inline] - pub fn contains(&self, addr: u64) -> bool { - let len = u64::try_from(self.bytes.len()).expect("segment byte len exceeds u64"); - self.vaddr <= addr && addr < (self.vaddr + len + self.zero_pad) + pub const fn typ(&self) -> SegmentType { + self.typ } } -#[derive(Debug)] -pub struct Elf<'bytes, const N: usize> { - machine: Machine, - entry: u64, - load_segments: [Option>; N], +/// Iterator type over ELF program header and segments. +struct SegmentIter<'bytes> { + reader: ByteReader<'bytes>, + bit: Bit, + endian: Endian, + phoff: usize, + phentsize: usize, + phnum: usize, + ph: usize, } -impl<'bytes, const N: usize> Elf<'bytes, N> { - pub fn parse(b: &'bytes [u8]) -> Result> { - let mut r = ElfReader::new(b); - - // - // Parse ELF header. - // - - if !matches!(r.read_slice(4), Ok(b"\x7fELF")) { - return Err(Error::WrongElfMagic); +impl<'bytes> SegmentIter<'bytes> { + /// Create a new [`SegmentIter`]. + const fn new( + bytes: &'bytes [u8], + bit: Bit, + endian: Endian, + phoff: usize, + phentsize: usize, + phnum: usize, + ) -> Self { + SegmentIter { + reader: ByteReader::new(bytes), + bit, + endian, + phoff, + phentsize, + phnum, + ph: 0, } + } +} - let bit = r.read::(Endian::Little).map(Bit::try_from)??; - let en = r.read::(Endian::Little).map(Endian::try_from)??; - - // Consume rest of e_ident. - r.bump(10); - - let _type = r.read::(en)?; - let machine = r.read::(en).map(Machine::try_from)??; - let _version = r.read::(en)?; - let entry = r.read_native(en, bit)?; - let phoff = r.read_native(en, bit)?; - let _shoff = r.read_native(en, bit)?; - let _flags = r.read::(en)?; - let ehsize = r.read::(en)?; - let phentsize = r.read::(en)?; - let phnum = r.read::(en)?; - let _shentsize = r.read::(en)?; - let _shnum = r.read::(en)?; - let _shstrndf = r.read::(en)?; - - assert_eq!(r.pos(), usize::from(ehsize)); - - // - // Parse load program header. - // +impl<'bytes> Iterator for SegmentIter<'bytes> { + type Item = Segment<'bytes>; - let mut load_segments = [None; N]; - let mut load_segments_slice = &mut load_segments[..]; + /// Try to parse next ELF program header and segment bytes. + fn next(&mut self) -> Option { + if self.ph < self.phnum { + // Position byte reader at the start of the current program header. + let off = self.ph.checked_mul(self.phentsize)?; + let pos = off.checked_add(self.phoff)?; + self.reader.set_pos(pos); - const PT_LOAD: u32 = 1; - const PF_X: u32 = 1 << 0; - const PF_W: u32 = 1 << 1; - const PF_R: u32 = 1 << 2; + // Bump to the next program header. + self.ph += 1; - let phoff = usize::try_from(phoff) - .map_err(|_| Error::TypeConversion("phoff does not fit into usize"))?; - - for ph in 0..phnum { - let off = ph - .checked_mul(phentsize) - .map(usize::from) - .ok_or(Error::TypeConversion("phdr offset does not fit into usize"))?; - let pos = phoff.checked_add(off).ok_or(Error::TypeConversion( - "phdr position does not fit into usize", - ))?; - r.set_pos(pos); - - // We only care about load segments. - if r.read::(en)? != PT_LOAD { - continue; - } + // Get some aliases. + let r = &mut self.reader; + let bit = self.bit; + let en = self.endian; + // Parse program header. + let typ = r.read::(en).map(SegmentType::from).ok()?; let mut flags = 0; - // Elf64 program header has flags field here. if matches!(bit, Bit::Bit64) { - flags = r.read::(en)? + flags = r.read::(en).ok()? } - let offset = r.read_native(en, bit)?; - let vaddr = r.read_native(en, bit)?; - let _paddr = r.read_native(en, bit)?; - let filesz = r.read_native(en, bit)?; - let memsz = r.read_native(en, bit)?; + let offset = r.read_native(en, bit).ok()?; + let vaddr = r.read_native(en, bit).ok()?; + let paddr = r.read_native(en, bit).ok()?; + let filesz = r.read_native(en, bit).ok()?; + let memsz = r.read_native(en, bit).ok()?; + debug_assert!(memsz >= filesz); // Elf32 program header has flags field here. if matches!(bit, Bit::Bit32) { - flags = r.read::(en)? + flags = r.read::(en).ok()? } - let _align = r.read_native(en, bit)?; + let _align = r.read_native(en, bit).ok()?; - let data_off = usize::try_from(offset) - .map_err(|_| Error::TypeConversion("file offset does not fit into usize"))?; - let data_len = usize::try_from(filesz) - .map_err(|_| Error::TypeConversion("file size does not fit into usize"))?; + let data_off = usize::try_from(offset).ok()?; + let data_len = usize::try_from(filesz).ok()?; - // Seek to start of PT_LOAD segment bytes. + // Seek to start of the segment bytes. r.set_pos(data_off); - // Get slice of PT_LOAD segment bytes. - let bytes = r.read_slice(data_len)?; - let x = (flags & PF_X) != 0; - let w = (flags & PF_W) != 0; - let r = (flags & PF_R) != 0; - - load_segments_slice = load_segments_slice - .split_first_mut() - .map(|(slot, rest)| { - *slot = Some(LoadSegment { - vaddr, - bytes, - zero_pad: memsz - filesz, - x, - w, - r, - }); - rest - }) - .ok_or(Error::OutOfLoadSegments)?; + // Get slice of segment bytes. + let bytes = r.read_slice(data_len).ok()?; + debug_assert_eq!(filesz, bytes.len() as u64); + + Some(Segment { + bytes, + vaddr, + paddr, + memsz, + filesz, + flags, + typ, + }) + } else { + None } + } +} + +/// An ELF file. +pub struct Elf<'bytes> { + bytes: &'bytes [u8], + bit: Bit, + endian: Endian, + machine: Machine, + entry: u64, + phoff: usize, + phentsize: usize, + phnum: usize, +} + +impl<'bytes> Elf<'bytes> { + /// Try to parse an [`Elf`] object from the `bytes` given. + pub fn parse(bytes: &'bytes [u8]) -> Result> { + let mut r = ByteReader::new(bytes); + + if !matches!(r.read_slice(4), Ok(b"\x7fELF")) { + return Err(Error::WrongElfMagic); + } + + let bit = r.read::(Endian::Little).map(Bit::try_from)??; + let endian = r.read::(Endian::Little).map(Endian::try_from)??; + + // Consume rest of e_ident. + r.bump(10); + + let _type = r.read::(endian)?; + let machine = r.read::(endian).map(Machine::try_from)??; + let _version = r.read::(endian)?; + let entry = r.read_native(endian, bit)?; + let phoff = r.read_native(endian, bit).map(usize::try_from)?.unwrap(); + let _shoff = r.read_native(endian, bit)?; + let _flags = r.read::(endian)?; + let ehsize = r.read::(endian)?; + let phentsize = r.read::(endian).map(usize::try_from)?.unwrap(); + let phnum = r.read::(endian).map(usize::try_from)?.unwrap(); + let _shentsize = r.read::(endian)?; + let _shnum = r.read::(endian)?; + let _shstrndf = r.read::(endian)?; + + assert_eq!(r.pos(), usize::from(ehsize)); Ok(Elf { + bytes, + bit, + endian, machine, entry, - load_segments, + phoff, + phentsize, + phnum, }) } + /// Get the machine field from the ELF header. #[inline] - pub fn machine(&self) -> Machine { + pub const fn machine(&self) -> Machine { self.machine } + /// Get the virtual address of the `entrypoint` from the ELF header. #[inline] - pub fn entry(&self) -> u64 { + pub const fn entry(&self) -> u64 { self.entry } + /// Get an iterator of the program header segments of type `PT_LOAD`. #[inline] - pub fn load_segments(&self) -> impl Iterator> { - self.load_segments.iter().flatten() + pub fn load_segments(&self) -> impl Iterator> { + SegmentIter::new( + self.bytes, + self.bit, + self.endian, + self.phoff, + self.phentsize, + self.phnum, + ) + .filter(|s| matches!(s.typ(), SegmentType::Load)) } } -- cgit v1.2.3