From 786a195f8e81d4f7c0af2a82b9d458361d424a71 Mon Sep 17 00:00:00 2001 From: johannst Date: Wed, 26 May 2021 00:21:06 +0200 Subject: minimal KVM abstraction + real mode guest example --- src/kvm.rs | 39 +++++++++ src/kvm_sys.rs | 248 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 145 +++++++++++++++++++++++++++++++++ src/vcpu.rs | 97 ++++++++++++++++++++++ src/vm.rs | 45 +++++++++++ 5 files changed, 574 insertions(+) create mode 100644 src/kvm.rs create mode 100644 src/kvm_sys.rs create mode 100644 src/lib.rs create mode 100644 src/vcpu.rs create mode 100644 src/vm.rs (limited to 'src') diff --git a/src/kvm.rs b/src/kvm.rs new file mode 100644 index 0000000..3522adc --- /dev/null +++ b/src/kvm.rs @@ -0,0 +1,39 @@ +use std::fs; +use std::io; +use std::os::unix::io::FromRawFd; + +use crate::{libcret, ioctl, kvm_sys}; +use crate::vm::Vm; + +pub struct Kvm { + kvm: fs::File, +} + +impl Kvm { + pub fn new() -> io::Result { + let kvm = libcret(unsafe { + libc::open("/dev/kvm\0".as_ptr().cast(), libc::O_RDWR | libc::O_CLOEXEC) + }) + .map(|fd| unsafe { fs::File::from_raw_fd(fd) })?; + + assert_eq!( + kvm_sys::KVM_API_VERSION, + ioctl(&kvm, kvm_sys::KVM_GET_API_VERSION, 0)? + ); + + Ok(Kvm { kvm }) + } + + fn get_vpcu_mmap_size(&self) -> io::Result { + ioctl(&self.kvm, kvm_sys::KVM_GET_VCPU_MMAP_SIZE, 0).map(|size| size as usize) + } + + pub fn create_vm(&self) -> io::Result { + let vm = ioctl(&self.kvm, kvm_sys::KVM_CREATE_VM, 0 /* machine id */) + .map(|fd| unsafe { fs::File::from_raw_fd(fd) })?; + + let vcpu_mmap_size = self.get_vpcu_mmap_size()?; + + Ok(Vm::new(vm, vcpu_mmap_size)) + } +} diff --git a/src/kvm_sys.rs b/src/kvm_sys.rs new file mode 100644 index 0000000..8d5c85b --- /dev/null +++ b/src/kvm_sys.rs @@ -0,0 +1,248 @@ +#![allow(non_snake_case)] +#![allow(non_camel_case_types)] +#![allow(dead_code)] + +// Generated by `build.rs`. +include!(concat!(env!("OUT_DIR"), "/kvm_constants.rs")); + +use std::fmt; + +#[repr(C)] +#[derive(Default, Debug)] +pub struct Rflags(pub u64); + +#[repr(C)] +#[derive(Default, Debug)] +pub struct kvm_regs { + pub rax: u64, + pub rbx: u64, + pub rcx: u64, + pub rdx: u64, + pub rsi: u64, + pub rdi: u64, + pub rsp: u64, + pub rbp: u64, + pub r8: u64, + pub r9: u64, + pub r10: u64, + pub r11: u64, + pub r12: u64, + pub r13: u64, + pub r14: u64, + pub r15: u64, + pub rip: u64, + pub rflags: Rflags, +} + +#[repr(C)] +#[derive(Default, Debug)] +pub struct kvm_segment { + pub base: u64, + pub limit: u32, + pub selector: u16, + pub type_: u8, + pub present: u8, + pub dpl: u8, + pub db: u8, + pub s: u8, + pub l: u8, + pub g: u8, + pub avl: u8, + unusable: u8, + _padding: u8, +} + +#[repr(C)] +#[derive(Default, Debug)] +pub struct kvm_dtable { + pub base: u64, + pub limit: u16, + _padding: [u16; 3], +} + +#[repr(C)] +#[derive(Default, Debug)] +pub struct kvm_sregs { + pub cs: kvm_segment, + pub ds: kvm_segment, + pub es: kvm_segment, + pub fs: kvm_segment, + pub gs: kvm_segment, + pub ss: kvm_segment, + pub tr: kvm_segment, + pub ldt: kvm_segment, + pub gdt: kvm_dtable, + pub idt: kvm_dtable, + pub cr0: u64, + cr2: u64, + pub cr3: u64, + pub cr4: u64, + pub cr8: u64, + pub effer: u64, + pub apic_base: u64, + pub interrupt_bitmap: [u64; 4], +} + +#[repr(C)] +#[derive(Default, Debug)] +pub(crate) struct kvm_userspace_memory_region { + pub slot: u32, + pub flags: u32, + pub guest_phys_addr: u64, + pub memory_size: u64, + pub userspace_addr: u64, +} + +#[repr(C)] +pub(crate) struct kvm_run { + request_interrupt_window: u8, + immediate_exit: u8, + padding1: [u8; 6], + pub exit_reason: u32, + ready_for_interrupt_injection: u8, + if_flag: u8, + flags: u16, + cr8: u64, + apic_base: u64, + pub inner: kvm_run_union, + kvm_valid_regs: u64, + kvm_dirty_regs: u64, + s: kvm_run_union_s, +} + +#[repr(C)] +#[derive(Copy, Clone, Debug)] +pub(crate) struct kvm_run_io { + pub direction: u8, + pub size: u8, + pub port: u16, + pub count: u32, + pub data_offset: u64, +} + +#[repr(C)] +#[derive(Copy, Clone, Debug)] +pub(crate) struct kvm_run_mmio { + pub phys_addr: u64, + pub data: [u8; 8], + pub len: u32, + pub is_write: u8, +} + +// Only add the union fields used here. +#[repr(C)] +pub(crate) union kvm_run_union { + pub io: kvm_run_io, + pub mmio: kvm_run_mmio, + padding: [u8; 256], +} + +// Only add the union fields used here. +#[repr(C)] +union kvm_run_union_s { + padding: [u8; 2048], +} + +// {{{ Display : Rflags + +impl fmt::Display for Rflags { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let IF = || (self.0 >> 9) & 0b1; + let IOPL = || (self.0 >> 12) & 0b11; + let AC = || (self.0 >> 19) & 0b1; + write!(f, "AC({}) IOPL({}) IF({})", AC(), IOPL(), IF()) + } +} + +// }}} +// {{{ Display : kvm_regs + +impl fmt::Display for kvm_regs { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "rax: {:#018x} rbx: {:#018x} rcx: {:#018x} rdx: {:#018x}\n\ + rsi: {:#018x} rdi: {:#018x}\n\ + r8 : {:#018x} r9 : {:#018x} r10: {:#018x} r11: {:#018x}\n\ + r12: {:#018x} r13: {:#018x} r14: {:#018x} r15: {:#018x}\n\ + rsp: {:#018x} rbp: {:#018x}\n\ + rip: {:#018x} rflags: {:#018x} [{}]", + self.rax, + self.rbx, + self.rcx, + self.rdx, + self.rsi, + self.rdi, + self.r8, + self.r9, + self.r10, + self.r11, + self.r12, + self.r13, + self.r14, + self.r15, + self.rsp, + self.rbp, + self.rip, + self.rflags.0, + self.rflags + ) + } +} + +// }}} + +#[cfg(test)] +mod tests { + use super::*; + use std::mem; + + #[test] + fn check_kvm_regs() { + assert_eq!(mem::size_of::(), TEST_KVM_REGS_SIZE); + assert_eq!(mem::align_of::(), TEST_KVM_REGS_ALIGN); + } + + #[test] + fn check_kvm_segment() { + assert_eq!(mem::size_of::(), TEST_KVM_SEGMENT_SIZE); + assert_eq!(mem::align_of::(), TEST_KVM_SEGMENT_ALIGN); + } + + #[test] + fn check_kvm_dtable() { + assert_eq!(mem::size_of::(), TEST_KVM_DTABLE_SIZE); + assert_eq!(mem::align_of::(), TEST_KVM_DTABLE_ALIGN); + } + + #[test] + fn check_kvm_sregs() { + assert_eq!(mem::size_of::(), TEST_KVM_SREGS_SIZE); + assert_eq!(mem::align_of::(), TEST_KVM_SREGS_ALIGN); + assert_eq!( + mem::size_of_val(&kvm_sregs::default().interrupt_bitmap), + TEST_KVM_SREGS_INTERRTUP_BITMAP_SIZE + ); + } + + #[test] + fn check_kvm_userspace_memory_region() { + assert_eq!( + mem::size_of::(), + TEST_KVM_USERSPACE_MEMORY_REGION_SIZE + ); + assert_eq!( + mem::align_of::(), + TEST_KVM_USERSPACE_MEMORY_REGION_ALIGN + ); + } + + #[test] + fn check_kvm_run() { + assert_eq!(mem::size_of::(), TEST_KVM_RUN_SIZE); + assert_eq!(mem::align_of::(), TEST_KVM_RUN_ALIGN); + assert_eq!(mem::size_of::(), TEST_KVM_RUN_IO_SIZE); + assert_eq!(mem::size_of::(), TEST_KVM_RUN_MMIO_SIZE); + assert_eq!(mem::size_of::(), TEST_KVM_RUN_UNION_S_SIZE); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..6793272 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,145 @@ +use std::convert::{AsMut, AsRef}; +use std::io; +use std::ops; +use std::os::unix::io::AsRawFd; + +pub mod kvm; +pub mod kvm_sys; +pub mod vcpu; +pub mod vm; + +/// Strong type representing physical addresses. +pub struct PhysAddr(pub u64); + +/// Helper to turn libc return values into an [io::Result](std::io::Result). Returns +/// [`Error::last_os_error`](std::io::Error::last_os_error) if `ret < 0`. +fn libcret(ret: libc::c_int) -> io::Result { + if ret < 0 { + Err(io::Error::last_os_error()) + } else { + Ok(ret) + } +} + +/// Wrapper of `libc::ioctl` for KVM ioctls with one argument and returning an +/// [`io::Result`](std::io::Result). +fn ioctl(fd: &F, cmd: u64, arg: u64) -> io::Result { + libcret(unsafe { libc::ioctl(fd.as_raw_fd(), cmd, arg) }) +} + +/// Wrapper to safely allocate memory for guest VMs. +/// +/// The underlying memory is freed automatically once the `UserMem` instance is dropped. +/// +/// Memory can be mapped into a guest VM with +/// [`Vm::set_user_memory_region`](crate::vm::Vm::set_user_memory_region). +pub struct UserMem { + ptr: *mut u8, + len: usize, +} + +impl UserMem { + /// Allocate a zero-initialized memory region of `len` bytes. + pub fn new(len: usize) -> io::Result { + let ptr = unsafe { + libc::mmap( + std::ptr::null_mut(), + len, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + -1, + 0, + ) + }; + + if ptr == libc::MAP_FAILED { + Err(io::Error::last_os_error()) + } else { + Ok(UserMem { + ptr: ptr.cast(), + len, + }) + } + } + + /// Allocate a zero-initialized memory region of `len` bytes and initialize the first bytes + /// with `init_len`. + /// + /// # Panics + /// + /// Panics if `init_from` is larger than the memory size `len`. + pub fn with_init(len: usize, init_from: &[u8]) -> io::Result { + assert!(len >= init_from.len()); + + let mut m = UserMem::new(len)?; + m.as_mut()[..init_from.len()].copy_from_slice(init_from); + Ok(m) + } +} + +impl ops::Drop for UserMem { + /// Free underlying memory. + fn drop(&mut self) { + unsafe { libc::munmap(self.ptr.cast(), self.len) }; + } +} + +impl AsRef<[u8]> for UserMem { + fn as_ref(&self) -> &[u8] { + unsafe { std::slice::from_raw_parts(self.ptr, self.len) } + } +} + +impl AsMut<[u8]> for UserMem { + fn as_mut(&mut self) -> &mut [u8] { + unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } + } +} + +/// Internal wrapper to automatically `mmap` and `munmap` the the [`struct kvm_run`][kvm_run] +/// for a given VPCU. +/// +/// [kvm_run]: https://www.kernel.org/doc/html/latest/virt/kvm/api.html#the-kvm-run-structure +struct KvmRun { + ptr: *mut kvm_sys::kvm_run, + len: usize, +} + +impl KvmRun { + /// Mmap the `struct kvm_run` for a given `VCPU` referenced by the argument file descriptor + /// `vcpu`. + fn new(vcpu: &F, len: usize) -> io::Result { + let ptr = unsafe { + libc::mmap( + std::ptr::null_mut(), + len, + libc::PROT_READ, + libc::MAP_SHARED, + vcpu.as_raw_fd(), + 0, + ) + }; + + if ptr == libc::MAP_FAILED { + Err(io::Error::last_os_error()) + } else { + Ok(KvmRun { + ptr: ptr.cast(), + len, + }) + } + } +} + +impl ops::Drop for KvmRun { + /// Munmap the mmaped `struct kvm_run`. + fn drop(&mut self) { + unsafe { libc::munmap(self.ptr.cast(), self.len) }; + } +} + +impl AsRef for KvmRun { + fn as_ref(&self) -> &kvm_sys::kvm_run { + unsafe { &(*self.ptr) } + } +} diff --git a/src/vcpu.rs b/src/vcpu.rs new file mode 100644 index 0000000..988bedc --- /dev/null +++ b/src/vcpu.rs @@ -0,0 +1,97 @@ +use std::fs; +use std::io; + +use crate::{ioctl, kvm_sys, KvmRun}; + +pub enum KvmExit<'cpu> { + Halt, + IoOut(u16, &'cpu [u8]), + MmioWrite(u64, &'cpu [u8]), +} + +pub struct Vcpu { + vcpu: fs::File, + kvm_run: KvmRun, +} + +impl Vcpu { + pub(crate) fn new(vcpu: fs::File, kvm_run: KvmRun) -> Vcpu { + Vcpu { vcpu, kvm_run } + } + + pub fn get_regs(&self) -> io::Result { + let mut regs = kvm_sys::kvm_regs::default(); + ioctl( + &self.vcpu, + kvm_sys::KVM_GET_REGS, + &mut regs as *mut _ as u64, + )?; + Ok(regs) + } + + pub fn set_regs(&self, regs: kvm_sys::kvm_regs) -> io::Result<()> { + ioctl(&self.vcpu, kvm_sys::KVM_SET_REGS, ®s as *const _ as u64).map(|_| ()) + } + + pub fn get_sregs(&self) -> io::Result { + let mut sregs = kvm_sys::kvm_sregs::default(); + ioctl( + &self.vcpu, + kvm_sys::KVM_GET_SREGS, + &mut sregs as *mut _ as u64, + )?; + Ok(sregs) + } + + pub fn set_sregs(&self, sregs: kvm_sys::kvm_sregs) -> io::Result<()> { + ioctl( + &self.vcpu, + kvm_sys::KVM_SET_SREGS, + &sregs as *const _ as u64, + ) + .map(|_| ()) + } + + pub fn run(&self) -> io::Result { + ioctl(&self.vcpu, kvm_sys::KVM_RUN, 0)?; + + let kvm_run = self.kvm_run.as_ref(); + + match kvm_run.exit_reason as u64 { + kvm_sys::KVM_EXIT_HLT => Ok(KvmExit::Halt), + kvm_sys::KVM_EXIT_IO => { + // Safe to use union `io` field, as Kernel instructed us to. + let io = unsafe { kvm_run.inner.io }; + + let kvm_run_ptr = kvm_run as *const kvm_sys::kvm_run as *const u8; + + // Create IO buffer located at `kvm_run + io.offset`. + let data = unsafe { + std::slice::from_raw_parts( + kvm_run_ptr.offset(io.data_offset as isize), + io.count /* num blocks */ as usize * io.size /* bytes per block */ as usize, + ) + }; + + match io.direction as u64 { + kvm_sys::KVM_EXIT_IO_IN => todo!("KVM_EXIT_IO_IN not implemented!"), + kvm_sys::KVM_EXIT_IO_OUT => Ok(KvmExit::IoOut(io.port, data)), + _ => unreachable!(), + } + } + kvm_sys::KVM_EXIT_MMIO => { + // Safe to use union `mmio` filed, as Kernel instructed us to. + let mmio = unsafe { &kvm_run.inner.mmio }; + let len = mmio.len as usize; + + // Only support write at the moment. + assert_ne!(0, mmio.is_write); + + Ok(KvmExit::MmioWrite(mmio.phys_addr, &mmio.data[..len])) + } + r @ _ => { + todo!("KVM_EXIT_... (exit_reason={}) not implemented!", r) + } + } + } +} diff --git a/src/vm.rs b/src/vm.rs new file mode 100644 index 0000000..6f8a355 --- /dev/null +++ b/src/vm.rs @@ -0,0 +1,45 @@ +use std::fs; +use std::io; +use std::os::unix::io::FromRawFd; + +use crate::vcpu::Vcpu; +use crate::{ioctl, kvm_sys, KvmRun, PhysAddr, UserMem}; + +pub struct Vm { + vm: fs::File, + vcpu_mmap_size: usize, +} + +impl Vm { + pub(crate) fn new(vm: fs::File, vcpu_mmap_size: usize) -> Vm { + Vm { vm, vcpu_mmap_size } + } + + pub unsafe fn set_user_memory_region( + &self, + phys_addr: PhysAddr, + mem: &UserMem, + ) -> io::Result<()> { + // Create guest physical memory mapping for `slot : 0` at guest `phys addr : 0`. + let mut kvm_mem = kvm_sys::kvm_userspace_memory_region::default(); + kvm_mem.userspace_addr = mem.ptr as u64; + kvm_mem.memory_size = mem.len as u64; + kvm_mem.guest_phys_addr = phys_addr.0; + + ioctl( + &self.vm, + kvm_sys::KVM_SET_USER_MEMORY_REGION, + &kvm_mem as *const _ as u64, + ) + .map(|_| ()) + } + + pub fn create_vpcu(&self, id: u64) -> io::Result { + let vcpu = ioctl(&self.vm, kvm_sys::KVM_CREATE_VCPU, id) + .map(|fd| unsafe { fs::File::from_raw_fd(fd) })?; + + let kvm_run = KvmRun::new(&vcpu, self.vcpu_mmap_size)?; + + Ok(Vcpu::new(vcpu, kvm_run)) + } +} -- cgit v1.2.3