From 2b5ad8bc5fbaca0a5cb4f5e7a030146e0d0ff3cd Mon Sep 17 00:00:00 2001 From: tommady Date: Wed, 13 Nov 2024 06:42:25 +0000 Subject: [PATCH 01/15] fix: Refactor vmm builder code to simplify logic pass vm_config to eliminate two extra arguments derived from it Signed-off-by: tommady --- src/vmm/src/builder.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 1f52fdad063..a94dc826971 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -154,8 +154,7 @@ fn create_vmm_and_vcpus( event_manager: &mut EventManager, guest_memory: GuestMemoryMmap, uffd: Option, - track_dirty_pages: bool, - vcpu_count: u8, + vm_config: &VmConfig, kvm_capabilities: Vec, ) -> Result<(Vmm, Vec), StartMicrovmError> { use self::StartMicrovmError::*; @@ -165,7 +164,7 @@ fn create_vmm_and_vcpus( let mut vm = Vm::new(kvm_capabilities) .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal)?; - vm.memory_init(&guest_memory, track_dirty_pages) + vm.memory_init(&guest_memory, vm_config.track_dirty_pages) .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal)?; @@ -186,7 +185,7 @@ fn create_vmm_and_vcpus( #[cfg(target_arch = "x86_64")] let (vcpus, pio_device_manager) = { setup_interrupt_controller(&mut vm)?; - let vcpus = create_vcpus(&vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; + let vcpus = create_vcpus(&vm, vm_config.vcpu_count, &vcpus_exit_evt).map_err(Internal)?; // Make stdout non blocking. set_stdout_nonblocking(); @@ -279,8 +278,7 @@ pub fn build_microvm_for_boot( event_manager, guest_memory, None, - vm_resources.vm_config.track_dirty_pages, - vm_resources.vm_config.vcpu_count, + &vm_resources.vm_config, cpu_template.kvm_capabilities.clone(), )?; @@ -469,8 +467,7 @@ pub fn build_microvm_from_snapshot( event_manager, guest_memory.clone(), uffd, - vm_resources.vm_config.track_dirty_pages, - vm_resources.vm_config.vcpu_count, + &vm_resources.vm_config, microvm_state.vm_state.kvm_cap_modifiers.clone(), )?; From 0093165e6fa8554e14bafcaff950ac34a2b98b01 Mon Sep 17 00:00:00 2001 From: tommady Date: Thu, 14 Nov 2024 04:20:29 +0000 Subject: [PATCH 02/15] fix: Refactor vmm builder code to simplify logic remove cfg_attr and extract create_vcpus from create_vmm_and_vcpus Signed-off-by: tommady --- src/vmm/src/builder.rs | 67 ++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index a94dc826971..096e6856188 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -148,7 +148,6 @@ impl std::convert::From for StartMicrovmError { } } -#[cfg_attr(target_arch = "aarch64", allow(unused))] fn create_vmm_and_vcpus( instance_info: &InstanceInfo, event_manager: &mut EventManager, @@ -179,17 +178,8 @@ fn create_vmm_and_vcpus( // Instantiate ACPI device manager. let acpi_device_manager = ACPIDeviceManager::new(); - - // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` - // while on aarch64 we need to do it the other way around. #[cfg(target_arch = "x86_64")] - let (vcpus, pio_device_manager) = { - setup_interrupt_controller(&mut vm)?; - let vcpus = create_vcpus(&vm, vm_config.vcpu_count, &vcpus_exit_evt).map_err(Internal)?; - - // Make stdout non blocking. - set_stdout_nonblocking(); - + let pio_device_manager = { // Serial device setup. let serial_device = setup_serial_device(event_manager, std::io::stdin(), io::stdout()).map_err(Internal)?; @@ -208,19 +198,10 @@ fn create_vmm_and_vcpus( pio_dev_mgr }; - (vcpus, pio_device_manager) + pio_device_manager }; - // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) before setting up the - // IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP - // was already initialized. - // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. - #[cfg(target_arch = "aarch64")] - let vcpus = { - let vcpus = create_vcpus(&vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; - setup_interrupt_controller(&mut vm, vcpu_count)?; - vcpus - }; + let vcpus = create_vcpus(&mut vm, vm_config.vcpu_count, &vcpus_exit_evt).map_err(Internal)?; let vmm = Vmm { events_observer: Some(std::io::stdin()), @@ -659,18 +640,14 @@ where /// Sets up the irqchip for a x86_64 microVM. #[cfg(target_arch = "x86_64")] -pub fn setup_interrupt_controller(vm: &mut Vm) -> Result<(), StartMicrovmError> { - vm.setup_irqchip() - .map_err(VmmError::Vm) - .map_err(StartMicrovmError::Internal) +pub fn setup_interrupt_controller(vm: &mut Vm) -> Result<(), VmmError> { + vm.setup_irqchip().map_err(VmmError::Vm) } /// Sets up the irqchip for a aarch64 microVM. #[cfg(target_arch = "aarch64")] -pub fn setup_interrupt_controller(vm: &mut Vm, vcpu_count: u8) -> Result<(), StartMicrovmError> { - vm.setup_irqchip(vcpu_count) - .map_err(VmmError::Vm) - .map_err(StartMicrovmError::Internal) +pub fn setup_interrupt_controller(vm: &mut Vm, vcpu_count: u8) -> Result<(), VmmError> { + vm.setup_irqchip(vcpu_count).map_err(VmmError::Vm) } /// Sets up the serial device. @@ -730,13 +707,39 @@ fn attach_legacy_devices_aarch64( .map_err(VmmError::RegisterMMIODevice) } -fn create_vcpus(vm: &Vm, vcpu_count: u8, exit_evt: &EventFd) -> Result, VmmError> { +// For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` +// while on aarch64 we need to do it the other way around. +#[cfg(target_arch = "x86_64")] +fn create_vcpus(vm: &mut Vm, vcpu_count: u8, exit_evt: &EventFd) -> Result, VmmError> { + setup_interrupt_controller(vm)?; + + let mut vcpus = Vec::with_capacity(vcpu_count as usize); + for cpu_idx in 0..vcpu_count { + let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; + let vcpu = Vcpu::new(cpu_idx, vm, exit_evt).map_err(VmmError::VcpuCreate)?; + vcpus.push(vcpu); + } + // Make stdout non blocking. + set_stdout_nonblocking(); + + Ok(vcpus) +} + +// On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) before setting up the +// IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP +// was already initialized. +// Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. +#[cfg(target_arch = "aarch64")] +fn create_vcpus(vm: &mut Vm, vcpu_count: u8, exit_evt: &EventFd) -> Result, VmmError> { let mut vcpus = Vec::with_capacity(vcpu_count as usize); for cpu_idx in 0..vcpu_count { let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; let vcpu = Vcpu::new(cpu_idx, vm, exit_evt).map_err(VmmError::VcpuCreate)?; vcpus.push(vcpu); } + + setup_interrupt_controller(vm, vcpu_count)?; + Ok(vcpus) } @@ -1362,7 +1365,7 @@ pub mod tests { #[cfg(target_arch = "x86_64")] setup_interrupt_controller(&mut vm).unwrap(); - let vcpu_vec = create_vcpus(&vm, vcpu_count, &evfd).unwrap(); + let vcpu_vec = create_vcpus(&mut vm, vcpu_count, &evfd).unwrap(); assert_eq!(vcpu_vec.len(), vcpu_count as usize); } From 0bf17fb22d8e1e9ffb3dbf9bf6c3edb76b542ff8 Mon Sep 17 00:00:00 2001 From: tommady Date: Fri, 15 Nov 2024 21:13:39 +0000 Subject: [PATCH 03/15] fix: Refactor vmm builder code to simplify logic extract codes into two architecture specific modes Signed-off-by: tommady --- src/vmm/src/builder.rs | 620 ++++++++++++++++----------- src/vmm/src/device_manager/legacy.rs | 2 +- src/vmm/src/device_manager/mmio.rs | 12 +- 3 files changed, 382 insertions(+), 252 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 096e6856188..d52db6e6424 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -148,19 +148,342 @@ impl std::convert::From for StartMicrovmError { } } -fn create_vmm_and_vcpus( +#[cfg(target_arch = "aarch64")] +pub mod aarch64 { + use super::*; + + fn attach_legacy_devices_aarch64( + event_manager: &mut EventManager, + vmm: &mut Vmm, + cmdline: &mut LoaderKernelCmdline, + ) -> Result<(), VmmError> { + // Serial device setup. + let cmdline_contains_console = cmdline + .as_cstring() + .map_err(|_| VmmError::Cmdline)? + .into_string() + .map_err(|_| VmmError::Cmdline)? + .contains("console="); + + if cmdline_contains_console { + // Make stdout non-blocking. + set_stdout_nonblocking(); + let serial = setup_serial_device(event_manager, std::io::stdin(), std::io::stdout())?; + vmm.mmio_device_manager + .register_mmio_serial(vmm.vm.fd(), &mut vmm.resource_allocator, serial, None) + .map_err(VmmError::RegisterMMIODevice)?; + vmm.mmio_device_manager + .add_mmio_serial_to_cmdline(cmdline) + .map_err(VmmError::RegisterMMIODevice)?; + } + + let rtc = RTCDevice(Rtc::with_events( + &crate::devices::legacy::rtc_pl031::METRICS, + )); + vmm.mmio_device_manager + .register_mmio_rtc(&mut vmm.resource_allocator, rtc, None) + .map_err(VmmError::RegisterMMIODevice) + } + + /// Configures the system for booting Linux. + pub fn configure_system_for_boot( + vmm: &mut Vmm, + vcpus: &mut [Vcpu], + vm_config: &VmConfig, + cpu_template: &CustomCpuTemplate, + entry_addr: GuestAddress, + initrd: &Option, + boot_cmdline: LoaderKernelCmdline, + ) -> Result<(), StartMicrovmError> { + use self::StartMicrovmError::*; + + // Construct the base CpuConfiguration to apply CPU template onto. + let cpu_config = { + use crate::arch::aarch64::regs::Aarch64RegisterVec; + use crate::arch::aarch64::vcpu::get_registers; + + for vcpu in vcpus.iter_mut() { + vcpu.kvm_vcpu + .init(&cpu_template.vcpu_features) + .map_err(VmmError::VcpuInit) + .map_err(Internal)?; + } + + let mut regs = Aarch64RegisterVec::default(); + get_registers(&vcpus[0].kvm_vcpu.fd, &cpu_template.reg_list(), &mut regs) + .map_err(GuestConfigError)?; + CpuConfiguration { regs } + }; + + // Apply CPU template to the base CpuConfiguration. + let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template)?; + + let vcpu_config = VcpuConfig { + vcpu_count: vm_config.vcpu_count, + smt: vm_config.smt, + cpu_config, + }; + + // Configure vCPUs with normalizing and setting the generated CPU configuration. + for vcpu in vcpus.iter_mut() { + vcpu.kvm_vcpu + .configure(vmm.guest_memory(), entry_addr, &vcpu_config) + .map_err(VmmError::VcpuConfigure) + .map_err(Internal)?; + } + + let vcpu_mpidr = vcpus + .iter_mut() + .map(|cpu| cpu.kvm_vcpu.get_mpidr()) + .collect(); + let cmdline = boot_cmdline.as_cstring()?; + crate::arch::aarch64::configure_system( + &vmm.guest_memory, + cmdline, + vcpu_mpidr, + vmm.mmio_device_manager.get_device_info(), + vmm.vm.get_irqchip(), + &vmm.acpi_device_manager.vmgenid, + initrd, + ) + .map_err(ConfigureSystem)?; + + Ok(()) + } + + /// Sets up the irqchip for a aarch64 microVM. + pub fn setup_interrupt_controller(vm: &mut Vm, vcpu_count: u8) -> Result<(), VmmError> { + vm.setup_irqchip(vcpu_count).map_err(VmmError::Vm) + } + + /// On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) before setting up the + /// IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP + /// was already initialized. + /// Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. + pub fn create_vcpus( + vm: &mut Vm, + vcpu_count: u8, + exit_evt: &EventFd, + ) -> Result, VmmError> { + let mut vcpus = Vec::with_capacity(vcpu_count as usize); + for cpu_idx in 0..vcpu_count { + let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; + let vcpu = Vcpu::new(cpu_idx, vm, exit_evt).map_err(VmmError::VcpuCreate)?; + vcpus.push(vcpu); + } + + setup_interrupt_controller(vm, vcpu_count)?; + + Ok(vcpus) + } + + pub fn load_kernel( + boot_config: &BootConfig, + guest_memory: &GuestMemoryMmap, + ) -> Result { + let mut kernel_file = boot_config + .kernel_file + .try_clone() + .map_err(|err| StartMicrovmError::Internal(VmmError::KernelFile(err)))?; + + let entry_addr = Loader::load::( + guest_memory, + Some(GuestAddress(crate::arch::get_kernel_start())), + &mut kernel_file, + None, + ) + .map_err(StartMicrovmError::KernelLoader)?; + + Ok(entry_addr.kernel_load) + } + + pub fn create_vmm_and_vcpus( + instance_info: &InstanceInfo, + event_manager: &mut EventManager, + guest_memory: GuestMemoryMmap, + uffd: Option, + vm_config: &VmConfig, + kvm_capabilities: Vec, + ) -> Result<(Vmm, Vec), StartMicrovmError> { + let mut vmm = build_vmm( + instance_info, + event_manager, + guest_memory, + uffd, + vm_config, + kvm_capabilities, + )?; + + let vcpus = + create_vcpus(&mut vm, vm_config.vcpu_count, &vmm.vcpus_exit_evt).map_err(Internal)?; + + Ok((vmm, vcpus)) + } +} + +#[cfg(target_arch = "x86_64")] +pub mod x86_64 { + use super::*; + + /// Configures the system for booting Linux. + pub fn configure_system_for_boot( + vmm: &mut Vmm, + vcpus: &mut [Vcpu], + vm_config: &VmConfig, + cpu_template: &CustomCpuTemplate, + entry_addr: GuestAddress, + initrd: &Option, + boot_cmdline: LoaderKernelCmdline, + ) -> Result<(), StartMicrovmError> { + use self::StartMicrovmError::*; + + // Construct the base CpuConfiguration to apply CPU template onto. + let cpu_config = { + use crate::cpu_config::x86_64::cpuid; + let cpuid = cpuid::Cpuid::try_from(vmm.vm.supported_cpuid().clone()) + .map_err(GuestConfigError::CpuidFromKvmCpuid)?; + let msrs = vcpus[0] + .kvm_vcpu + .get_msrs(cpu_template.msr_index_iter()) + .map_err(GuestConfigError::VcpuIoctl)?; + CpuConfiguration { cpuid, msrs } + }; + + // Apply CPU template to the base CpuConfiguration. + let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template)?; + + let vcpu_config = VcpuConfig { + vcpu_count: vm_config.vcpu_count, + smt: vm_config.smt, + cpu_config, + }; + + // Configure vCPUs with normalizing and setting the generated CPU configuration. + for vcpu in vcpus.iter_mut() { + vcpu.kvm_vcpu + .configure(vmm.guest_memory(), entry_addr, &vcpu_config) + .map_err(VmmError::VcpuConfigure) + .map_err(Internal)?; + } + + // Write the kernel command line to guest memory. This is x86_64 specific, since on + // aarch64 the command line will be specified through the FDT. + let cmdline_size = boot_cmdline + .as_cstring() + .map(|cmdline_cstring| cmdline_cstring.as_bytes_with_nul().len())?; + + linux_loader::loader::load_cmdline::( + vmm.guest_memory(), + GuestAddress(crate::arch::x86_64::layout::CMDLINE_START), + &boot_cmdline, + ) + .map_err(LoadCommandline)?; + crate::arch::x86_64::configure_system( + &vmm.guest_memory, + &mut vmm.resource_allocator, + crate::vstate::memory::GuestAddress(crate::arch::x86_64::layout::CMDLINE_START), + cmdline_size, + initrd, + vcpu_config.vcpu_count, + ) + .map_err(ConfigureSystem)?; + + // Create ACPI tables and write them in guest memory + // For the time being we only support ACPI in x86_64 + acpi::create_acpi_tables( + &vmm.guest_memory, + &mut vmm.resource_allocator, + &vmm.mmio_device_manager, + &vmm.acpi_device_manager, + vcpus, + )?; + + Ok(()) + } + + /// Sets up the irqchip for a x86_64 microVM. + pub fn setup_interrupt_controller(vm: &mut Vm) -> Result<(), VmmError> { + vm.setup_irqchip().map_err(VmmError::Vm) + } + + /// For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` + /// while on aarch64 we need to do it the other way around. + pub fn create_vcpus( + vm: &mut Vm, + vcpu_count: u8, + exit_evt: &EventFd, + ) -> Result, VmmError> { + setup_interrupt_controller(vm)?; + + let mut vcpus = Vec::with_capacity(vcpu_count as usize); + for cpu_idx in 0..vcpu_count { + let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; + let vcpu = Vcpu::new(cpu_idx, vm, exit_evt).map_err(VmmError::VcpuCreate)?; + vcpus.push(vcpu); + } + // Make stdout non blocking. + set_stdout_nonblocking(); + + Ok(vcpus) + } + + pub fn load_kernel( + boot_config: &BootConfig, + guest_memory: &GuestMemoryMmap, + ) -> Result { + let mut kernel_file = boot_config + .kernel_file + .try_clone() + .map_err(|err| StartMicrovmError::Internal(VmmError::KernelFile(err)))?; + + let entry_addr = Loader::load::( + guest_memory, + None, + &mut kernel_file, + Some(GuestAddress(crate::arch::get_kernel_start())), + ) + .map_err(StartMicrovmError::KernelLoader)?; + + Ok(entry_addr.kernel_load) + } + + pub fn create_vmm_and_vcpus( + instance_info: &InstanceInfo, + event_manager: &mut EventManager, + guest_memory: GuestMemoryMmap, + uffd: Option, + vm_config: &VmConfig, + kvm_capabilities: Vec, + ) -> Result<(Vmm, Vec), StartMicrovmError> { + let mut vmm = build_vmm( + instance_info, + event_manager, + guest_memory, + uffd, + vm_config, + kvm_capabilities, + )?; + + let vcpus = create_vcpus(&mut vmm.vm, vm_config.vcpu_count, &vmm.vcpus_exit_evt) + .map_err(StartMicrovmError::Internal)?; + + Ok((vmm, vcpus)) + } +} + +fn build_vmm( instance_info: &InstanceInfo, event_manager: &mut EventManager, guest_memory: GuestMemoryMmap, uffd: Option, vm_config: &VmConfig, kvm_capabilities: Vec, -) -> Result<(Vmm, Vec), StartMicrovmError> { +) -> Result { use self::StartMicrovmError::*; // Set up Kvm Vm and register memory regions. // Build custom CPU config if a custom template is provided. - let mut vm = Vm::new(kvm_capabilities) + let vm = Vm::new(kvm_capabilities) .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal)?; vm.memory_init(&guest_memory, vm_config.track_dirty_pages) @@ -178,6 +501,9 @@ fn create_vmm_and_vcpus( // Instantiate ACPI device manager. let acpi_device_manager = ACPIDeviceManager::new(); + + // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` + // while on aarch64 we need to do it the other way around. #[cfg(target_arch = "x86_64")] let pio_device_manager = { // Serial device setup. @@ -201,9 +527,7 @@ fn create_vmm_and_vcpus( pio_device_manager }; - let vcpus = create_vcpus(&mut vm, vm_config.vcpu_count, &vcpus_exit_evt).map_err(Internal)?; - - let vmm = Vmm { + Ok(Vmm { events_observer: Some(std::io::stdin()), instance_info: instance_info.clone(), shutdown_exit_code: None, @@ -217,9 +541,7 @@ fn create_vmm_and_vcpus( #[cfg(target_arch = "x86_64")] pio_device_manager, acpi_device_manager, - }; - - Ok((vmm, vcpus)) + }) } /// Builds and starts a microVM based on the current Firecracker VmResources configuration. @@ -246,7 +568,11 @@ pub fn build_microvm_for_boot( .allocate_guest_memory() .map_err(StartMicrovmError::GuestMemory)?; - let entry_addr = load_kernel(boot_config, &guest_memory)?; + #[cfg(target_arch = "x86_64")] + let entry_addr = x86_64::load_kernel(boot_config, &guest_memory)?; + #[cfg(target_arch = "aarch64")] + let entry_addr = aarch64::load_kernel(boot_config, &guest_memory)?; + let initrd = load_initrd_from_config(boot_config, &guest_memory)?; // Clone the command-line so that a failed boot doesn't pollute the original. #[allow(unused_mut)] @@ -254,7 +580,17 @@ pub fn build_microvm_for_boot( let cpu_template = vm_resources.vm_config.cpu_template.get_cpu_template()?; - let (mut vmm, mut vcpus) = create_vmm_and_vcpus( + #[cfg(target_arch = "x86_64")] + let (mut vmm, mut vcpus) = x86_64::create_vmm_and_vcpus( + instance_info, + event_manager, + guest_memory, + None, + &vm_resources.vm_config, + cpu_template.kvm_capabilities.clone(), + )?; + #[cfg(target_arch = "aarch64")] + let (mut vmm, mut vcpus) = aarch64::create_vmm_and_vcpus( instance_info, event_manager, guest_memory, @@ -308,11 +644,23 @@ pub fn build_microvm_for_boot( } #[cfg(target_arch = "aarch64")] - attach_legacy_devices_aarch64(event_manager, &mut vmm, &mut boot_cmdline).map_err(Internal)?; + aarch::attach_legacy_devices_aarch64(event_manager, &mut vmm, &mut boot_cmdline) + .map_err(Internal)?; attach_vmgenid_device(&mut vmm)?; - configure_system_for_boot( + #[cfg(target_arch = "x86_64")] + x86_64::configure_system_for_boot( + &mut vmm, + vcpus.as_mut(), + &vm_resources.vm_config, + &cpu_template, + entry_addr, + &initrd, + boot_cmdline, + )?; + #[cfg(target_arch = "aarch64")] + aarch64::configure_system_for_boot( &mut vmm, vcpus.as_mut(), &vm_resources.vm_config, @@ -443,7 +791,17 @@ pub fn build_microvm_from_snapshot( ) -> Result>, BuildMicrovmFromSnapshotError> { // Build Vmm. debug!("event_start: build microvm from snapshot"); - let (mut vmm, mut vcpus) = create_vmm_and_vcpus( + #[cfg(target_arch = "x86_64")] + let (mut vmm, mut vcpus) = x86_64::create_vmm_and_vcpus( + instance_info, + event_manager, + guest_memory.clone(), + uffd, + &vm_resources.vm_config, + microvm_state.vm_state.kvm_cap_modifiers.clone(), + )?; + #[cfg(target_arch = "aarch64")] + let (mut vmm, mut vcpus) = aarch64::create_vmm_and_vcpus( instance_info, event_manager, guest_memory.clone(), @@ -545,36 +903,6 @@ pub fn build_microvm_from_snapshot( Ok(vmm) } -fn load_kernel( - boot_config: &BootConfig, - guest_memory: &GuestMemoryMmap, -) -> Result { - let mut kernel_file = boot_config - .kernel_file - .try_clone() - .map_err(|err| StartMicrovmError::Internal(VmmError::KernelFile(err)))?; - - #[cfg(target_arch = "x86_64")] - let entry_addr = Loader::load::( - guest_memory, - None, - &mut kernel_file, - Some(GuestAddress(crate::arch::get_kernel_start())), - ) - .map_err(StartMicrovmError::KernelLoader)?; - - #[cfg(target_arch = "aarch64")] - let entry_addr = Loader::load::( - guest_memory, - Some(GuestAddress(crate::arch::get_kernel_start())), - &mut kernel_file, - None, - ) - .map_err(StartMicrovmError::KernelLoader)?; - - Ok(entry_addr.kernel_load) -} - fn load_initrd_from_config( boot_cfg: &BootConfig, vm_memory: &GuestMemoryMmap, @@ -638,18 +966,6 @@ where }) } -/// Sets up the irqchip for a x86_64 microVM. -#[cfg(target_arch = "x86_64")] -pub fn setup_interrupt_controller(vm: &mut Vm) -> Result<(), VmmError> { - vm.setup_irqchip().map_err(VmmError::Vm) -} - -/// Sets up the irqchip for a aarch64 microVM. -#[cfg(target_arch = "aarch64")] -pub fn setup_interrupt_controller(vm: &mut Vm, vcpu_count: u8) -> Result<(), VmmError> { - vm.setup_irqchip(vcpu_count).map_err(VmmError::Vm) -} - /// Sets up the serial device. pub fn setup_serial_device( event_manager: &mut EventManager, @@ -673,192 +989,6 @@ pub fn setup_serial_device( Ok(serial) } -#[cfg(target_arch = "aarch64")] -fn attach_legacy_devices_aarch64( - event_manager: &mut EventManager, - vmm: &mut Vmm, - cmdline: &mut LoaderKernelCmdline, -) -> Result<(), VmmError> { - // Serial device setup. - let cmdline_contains_console = cmdline - .as_cstring() - .map_err(|_| VmmError::Cmdline)? - .into_string() - .map_err(|_| VmmError::Cmdline)? - .contains("console="); - - if cmdline_contains_console { - // Make stdout non-blocking. - set_stdout_nonblocking(); - let serial = setup_serial_device(event_manager, std::io::stdin(), std::io::stdout())?; - vmm.mmio_device_manager - .register_mmio_serial(vmm.vm.fd(), &mut vmm.resource_allocator, serial, None) - .map_err(VmmError::RegisterMMIODevice)?; - vmm.mmio_device_manager - .add_mmio_serial_to_cmdline(cmdline) - .map_err(VmmError::RegisterMMIODevice)?; - } - - let rtc = RTCDevice(Rtc::with_events( - &crate::devices::legacy::rtc_pl031::METRICS, - )); - vmm.mmio_device_manager - .register_mmio_rtc(&mut vmm.resource_allocator, rtc, None) - .map_err(VmmError::RegisterMMIODevice) -} - -// For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` -// while on aarch64 we need to do it the other way around. -#[cfg(target_arch = "x86_64")] -fn create_vcpus(vm: &mut Vm, vcpu_count: u8, exit_evt: &EventFd) -> Result, VmmError> { - setup_interrupt_controller(vm)?; - - let mut vcpus = Vec::with_capacity(vcpu_count as usize); - for cpu_idx in 0..vcpu_count { - let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; - let vcpu = Vcpu::new(cpu_idx, vm, exit_evt).map_err(VmmError::VcpuCreate)?; - vcpus.push(vcpu); - } - // Make stdout non blocking. - set_stdout_nonblocking(); - - Ok(vcpus) -} - -// On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) before setting up the -// IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP -// was already initialized. -// Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. -#[cfg(target_arch = "aarch64")] -fn create_vcpus(vm: &mut Vm, vcpu_count: u8, exit_evt: &EventFd) -> Result, VmmError> { - let mut vcpus = Vec::with_capacity(vcpu_count as usize); - for cpu_idx in 0..vcpu_count { - let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; - let vcpu = Vcpu::new(cpu_idx, vm, exit_evt).map_err(VmmError::VcpuCreate)?; - vcpus.push(vcpu); - } - - setup_interrupt_controller(vm, vcpu_count)?; - - Ok(vcpus) -} - -/// Configures the system for booting Linux. -#[cfg_attr(target_arch = "aarch64", allow(unused))] -pub fn configure_system_for_boot( - vmm: &mut Vmm, - vcpus: &mut [Vcpu], - vm_config: &VmConfig, - cpu_template: &CustomCpuTemplate, - entry_addr: GuestAddress, - initrd: &Option, - boot_cmdline: LoaderKernelCmdline, -) -> Result<(), StartMicrovmError> { - use self::StartMicrovmError::*; - - // Construct the base CpuConfiguration to apply CPU template onto. - #[cfg(target_arch = "x86_64")] - let cpu_config = { - use crate::cpu_config::x86_64::cpuid; - let cpuid = cpuid::Cpuid::try_from(vmm.vm.supported_cpuid().clone()) - .map_err(GuestConfigError::CpuidFromKvmCpuid)?; - let msrs = vcpus[0] - .kvm_vcpu - .get_msrs(cpu_template.msr_index_iter()) - .map_err(GuestConfigError::VcpuIoctl)?; - CpuConfiguration { cpuid, msrs } - }; - - #[cfg(target_arch = "aarch64")] - let cpu_config = { - use crate::arch::aarch64::regs::Aarch64RegisterVec; - use crate::arch::aarch64::vcpu::get_registers; - - for vcpu in vcpus.iter_mut() { - vcpu.kvm_vcpu - .init(&cpu_template.vcpu_features) - .map_err(VmmError::VcpuInit) - .map_err(Internal)?; - } - - let mut regs = Aarch64RegisterVec::default(); - get_registers(&vcpus[0].kvm_vcpu.fd, &cpu_template.reg_list(), &mut regs) - .map_err(GuestConfigError)?; - CpuConfiguration { regs } - }; - - // Apply CPU template to the base CpuConfiguration. - let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template)?; - - let vcpu_config = VcpuConfig { - vcpu_count: vm_config.vcpu_count, - smt: vm_config.smt, - cpu_config, - }; - - // Configure vCPUs with normalizing and setting the generated CPU configuration. - for vcpu in vcpus.iter_mut() { - vcpu.kvm_vcpu - .configure(vmm.guest_memory(), entry_addr, &vcpu_config) - .map_err(VmmError::VcpuConfigure) - .map_err(Internal)?; - } - - #[cfg(target_arch = "x86_64")] - { - // Write the kernel command line to guest memory. This is x86_64 specific, since on - // aarch64 the command line will be specified through the FDT. - let cmdline_size = boot_cmdline - .as_cstring() - .map(|cmdline_cstring| cmdline_cstring.as_bytes_with_nul().len())?; - - linux_loader::loader::load_cmdline::( - vmm.guest_memory(), - GuestAddress(crate::arch::x86_64::layout::CMDLINE_START), - &boot_cmdline, - ) - .map_err(LoadCommandline)?; - crate::arch::x86_64::configure_system( - &vmm.guest_memory, - &mut vmm.resource_allocator, - crate::vstate::memory::GuestAddress(crate::arch::x86_64::layout::CMDLINE_START), - cmdline_size, - initrd, - vcpu_config.vcpu_count, - ) - .map_err(ConfigureSystem)?; - - // Create ACPI tables and write them in guest memory - // For the time being we only support ACPI in x86_64 - acpi::create_acpi_tables( - &vmm.guest_memory, - &mut vmm.resource_allocator, - &vmm.mmio_device_manager, - &vmm.acpi_device_manager, - vcpus, - )?; - } - #[cfg(target_arch = "aarch64")] - { - let vcpu_mpidr = vcpus - .iter_mut() - .map(|cpu| cpu.kvm_vcpu.get_mpidr()) - .collect(); - let cmdline = boot_cmdline.as_cstring()?; - crate::arch::aarch64::configure_system( - &vmm.guest_memory, - cmdline, - vcpu_mpidr, - vmm.mmio_device_manager.get_device_info(), - vmm.vm.get_irqchip(), - &vmm.acpi_device_manager.vmgenid, - initrd, - ) - .map_err(ConfigureSystem)?; - } - Ok(()) -} - /// Attaches a VirtioDevice device to the device manager and event manager. fn attach_virtio_device( event_manager: &mut EventManager, @@ -1127,7 +1257,7 @@ pub mod tests { .unwrap(); #[cfg(target_arch = "x86_64")] - setup_interrupt_controller(&mut vm).unwrap(); + x86_64::setup_interrupt_controller(&mut vm).unwrap(); #[cfg(target_arch = "aarch64")] { @@ -1363,9 +1493,9 @@ pub mod tests { let evfd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); #[cfg(target_arch = "x86_64")] - setup_interrupt_controller(&mut vm).unwrap(); + x86_64::setup_interrupt_controller(&mut vm).unwrap(); - let vcpu_vec = create_vcpus(&mut vm, vcpu_count, &evfd).unwrap(); + let vcpu_vec = x86_64::create_vcpus(&mut vm, vcpu_count, &evfd).unwrap(); assert_eq!(vcpu_vec.len(), vcpu_count as usize); } diff --git a/src/vmm/src/device_manager/legacy.rs b/src/vmm/src/device_manager/legacy.rs index 45842d933b2..664f74bc06f 100644 --- a/src/vmm/src/device_manager/legacy.rs +++ b/src/vmm/src/device_manager/legacy.rs @@ -252,7 +252,7 @@ mod tests { let guest_mem = single_region_mem(0x1000); let mut vm = Vm::new(vec![]).unwrap(); vm.memory_init(&guest_mem, false).unwrap(); - crate::builder::setup_interrupt_controller(&mut vm).unwrap(); + crate::builder::x86_64::setup_interrupt_controller(&mut vm).unwrap(); let mut ldm = PortIODeviceManager::new( Arc::new(Mutex::new(BusDevice::Serial(SerialDevice { serial: Serial::with_events( diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 00c155abcfd..774967e8933 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -669,9 +669,9 @@ mod tests { let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); let dummy = Arc::new(Mutex::new(DummyDevice::new())); #[cfg(target_arch = "x86_64")] - builder::setup_interrupt_controller(&mut vm).unwrap(); + builder::x86_64::setup_interrupt_controller(&mut vm).unwrap(); #[cfg(target_arch = "aarch64")] - builder::setup_interrupt_controller(&mut vm, 1).unwrap(); + builder::aarch64::setup_interrupt_controller(&mut vm, 1).unwrap(); device_manager .register_virtio_test_device( @@ -697,9 +697,9 @@ mod tests { let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); #[cfg(target_arch = "x86_64")] - builder::setup_interrupt_controller(&mut vm).unwrap(); + builder::x86_64::setup_interrupt_controller(&mut vm).unwrap(); #[cfg(target_arch = "aarch64")] - builder::setup_interrupt_controller(&mut vm, 1).unwrap(); + builder::aarch64::setup_interrupt_controller(&mut vm, 1).unwrap(); for _i in crate::arch::IRQ_BASE..=crate::arch::IRQ_MAX { device_manager @@ -750,9 +750,9 @@ mod tests { let mem_clone = guest_mem.clone(); #[cfg(target_arch = "x86_64")] - builder::setup_interrupt_controller(&mut vm).unwrap(); + builder::x86_64::setup_interrupt_controller(&mut vm).unwrap(); #[cfg(target_arch = "aarch64")] - builder::setup_interrupt_controller(&mut vm, 1).unwrap(); + builder::aarch64::setup_interrupt_controller(&mut vm, 1).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new().unwrap(); From 6b97c074204a062c622dd91c25be2277a16b8dd5 Mon Sep 17 00:00:00 2001 From: tommady Date: Fri, 15 Nov 2024 21:38:03 +0000 Subject: [PATCH 04/15] fix: Refactor vmm builder code to simplify logic eliminate the unnecessary usage of the event_manager argument Signed-off-by: tommady --- src/vmm/src/builder.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index d52db6e6424..4a3eb48e0fc 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -299,7 +299,6 @@ pub mod aarch64 { pub fn create_vmm_and_vcpus( instance_info: &InstanceInfo, - event_manager: &mut EventManager, guest_memory: GuestMemoryMmap, uffd: Option, vm_config: &VmConfig, @@ -307,7 +306,6 @@ pub mod aarch64 { ) -> Result<(Vmm, Vec), StartMicrovmError> { let mut vmm = build_vmm( instance_info, - event_manager, guest_memory, uffd, vm_config, @@ -449,7 +447,6 @@ pub mod x86_64 { pub fn create_vmm_and_vcpus( instance_info: &InstanceInfo, - event_manager: &mut EventManager, guest_memory: GuestMemoryMmap, uffd: Option, vm_config: &VmConfig, @@ -457,7 +454,6 @@ pub mod x86_64 { ) -> Result<(Vmm, Vec), StartMicrovmError> { let mut vmm = build_vmm( instance_info, - event_manager, guest_memory, uffd, vm_config, @@ -473,7 +469,6 @@ pub mod x86_64 { fn build_vmm( instance_info: &InstanceInfo, - event_manager: &mut EventManager, guest_memory: GuestMemoryMmap, uffd: Option, vm_config: &VmConfig, @@ -508,7 +503,7 @@ fn build_vmm( let pio_device_manager = { // Serial device setup. let serial_device = - setup_serial_device(event_manager, std::io::stdin(), io::stdout()).map_err(Internal)?; + setup_serial_device(std::io::stdin(), io::stdout()).map_err(Internal)?; // x86_64 uses the i8042 reset event as the Vmm exit event. let reset_evt = vcpus_exit_evt @@ -583,16 +578,17 @@ pub fn build_microvm_for_boot( #[cfg(target_arch = "x86_64")] let (mut vmm, mut vcpus) = x86_64::create_vmm_and_vcpus( instance_info, - event_manager, guest_memory, None, &vm_resources.vm_config, cpu_template.kvm_capabilities.clone(), )?; + #[cfg(target_arch = "x86_64")] + event_manager.add_subscriber(vmm.pio_device_manager.stdio_serial.clone()); + #[cfg(target_arch = "aarch64")] let (mut vmm, mut vcpus) = aarch64::create_vmm_and_vcpus( instance_info, - event_manager, guest_memory, None, &vm_resources.vm_config, @@ -794,16 +790,16 @@ pub fn build_microvm_from_snapshot( #[cfg(target_arch = "x86_64")] let (mut vmm, mut vcpus) = x86_64::create_vmm_and_vcpus( instance_info, - event_manager, guest_memory.clone(), uffd, &vm_resources.vm_config, microvm_state.vm_state.kvm_cap_modifiers.clone(), )?; + #[cfg(target_arch = "x86_64")] + event_manager.add_subscriber(vmm.pio_device_manager.stdio_serial.clone()); #[cfg(target_arch = "aarch64")] let (mut vmm, mut vcpus) = aarch64::create_vmm_and_vcpus( instance_info, - event_manager, guest_memory.clone(), uffd, &vm_resources.vm_config, @@ -968,7 +964,6 @@ where /// Sets up the serial device. pub fn setup_serial_device( - event_manager: &mut EventManager, input: std::io::Stdin, out: std::io::Stdout, ) -> Result>, VmmError> { @@ -985,7 +980,7 @@ pub fn setup_serial_device( ), input: Some(input), }))); - event_manager.add_subscriber(serial.clone()); + Ok(serial) } From 16cf6b03e4a8991fd82cca55bd9aeb4ab05a21cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Wed, 13 Nov 2024 10:54:57 +0100 Subject: [PATCH 05/15] test: test ARM CPU templates in Linux host 5.10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the SVE CPU template as a valid template in 5.10 since it works. Signed-off-by: Pablo Barbáchano --- tests/framework/utils_cpu_templates.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/framework/utils_cpu_templates.py b/tests/framework/utils_cpu_templates.py index 5badd7c640a..96bf197efda 100644 --- a/tests/framework/utils_cpu_templates.py +++ b/tests/framework/utils_cpu_templates.py @@ -42,18 +42,12 @@ def get_supported_cpu_templates(): SUPPORTED_CPU_TEMPLATES = get_supported_cpu_templates() -# Custom CPU templates for Aarch64 for testing -AARCH64_CUSTOM_CPU_TEMPLATES_G2 = ["v1n1"] -AARCH64_CUSTOM_CPU_TEMPLATES_G3 = [ - "aarch64_with_sve_and_pac", - "v1n1", -] - def get_supported_custom_cpu_templates(): """ Return the list of custom CPU templates supported by the platform. """ + # pylint:disable=too-many-return-statements host_linux = global_props.host_linux_version_tpl match get_cpu_vendor(), global_props.cpu_codename: @@ -65,9 +59,11 @@ def get_supported_custom_cpu_templates(): case CpuVendor.AMD, _: return AMD_TEMPLATES case CpuVendor.ARM, CpuModel.ARM_NEOVERSE_N1 if host_linux >= (6, 1): - return AARCH64_CUSTOM_CPU_TEMPLATES_G2 + return ["v1n1"] case CpuVendor.ARM, CpuModel.ARM_NEOVERSE_V1 if host_linux >= (6, 1): - return AARCH64_CUSTOM_CPU_TEMPLATES_G3 + return ["v1n1", "aarch64_with_sve_and_pac"] + case CpuVendor.ARM, CpuModel.ARM_NEOVERSE_V1: + return ["aarch64_with_sve_and_pac"] case _: return [] From 6221debf075e0efd5196cf7a0c5d79e28a073ba5 Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Wed, 13 Nov 2024 11:53:26 +0000 Subject: [PATCH 06/15] chore: Update to v1.10.1 patch Update release policy to v1.10.1 patch Signed-off-by: Jack Thomson --- docs/RELEASE_POLICY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/RELEASE_POLICY.md b/docs/RELEASE_POLICY.md index fb95c55e402..fe2f881d6b6 100644 --- a/docs/RELEASE_POLICY.md +++ b/docs/RELEASE_POLICY.md @@ -90,7 +90,7 @@ v3.1 will be patched since were the last two Firecracker releases and less than | Release | Release Date | Latest Patch | Min. end of support | Official end of Support | | ------: | -----------: | -----------: | ------------------: | :------------------------------ | -| v1.10 | 2024-11-07 | v1.10.0 | 2025-05-07 | Supported | +| v1.10 | 2024-11-07 | v1.10.1 | 2025-05-07 | Supported | | v1.9 | 2024-09-02 | v1.9.1 | 2025-03-02 | Supported | | v1.8 | 2024-07-10 | v1.8.0 | 2025-01-10 | Supported | | v1.7 | 2024-03-18 | v1.7.0 | 2024-09-18 | 2024-09-18 (end of 6mo support) | From 133e5412e150ace0867118d4ead440ee9a4fabda Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Wed, 13 Nov 2024 11:07:12 +0000 Subject: [PATCH 07/15] snapshot: Remove max_connections and max_pending_resets fields `TcpIPv4Handler` for MMDS network stack preallocates several buffers whose sizes are saved into a snapshot as `max_connections` and `max_pending_resets` in `MmdsNetworkStackState`. But they are always the same constant hardcoded values (`DEFAULT_MAX_CONNECTIONS` and `DEFAULT_MAX_PENDING_RESETS`) as of today, which means there is no need to save them into a snapshot. Even if we change the hardcoded sizes across Firecracker versions, that should not be a problem. This is because the snapshot feature does not support migration of network connections and those buffers are initialized with empty on snapshot restoration. When migrating from a Firecracker version with larger buffers to another version with smaller ones, guest workloads that worked previously might start to fail due to the less buffer spaces. However, the issue is not a problem of the snapshot feature and it should also occur even on a purely booted microVM (not restored from a snapshot). Thus, it is fine to remove those fields from a snapshot. Since this is a breaking change of the snapshot format, bumps the major version. Signed-off-by: Takahiro Itazuri --- CHANGELOG.md | 4 ++++ src/vmm/src/mmds/ns.rs | 15 +++------------ src/vmm/src/mmds/persist.rs | 15 --------------- src/vmm/src/persist.rs | 2 +- 4 files changed, 8 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ca0688ce06..55c46a05276 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ and this project adheres to ### Changed +- [#4913](https://github.com/firecracker-microvm/firecracker/pull/4913): Removed + unnecessary fields (`max_connections` and `max_pending_resets`) from the + snapshot format, bumping the snapshot version to 5.0.0. + ### Deprecated ### Removed diff --git a/src/vmm/src/mmds/ns.rs b/src/vmm/src/mmds/ns.rs index 09d73b21e99..d431d71cbcb 100644 --- a/src/vmm/src/mmds/ns.rs +++ b/src/vmm/src/mmds/ns.rs @@ -81,8 +81,6 @@ impl MmdsNetworkStack { mac_addr: MacAddr, ipv4_addr: Ipv4Addr, tcp_port: u16, - max_connections: NonZeroUsize, - max_pending_resets: NonZeroUsize, mmds: Arc>, ) -> Self { MmdsNetworkStack { @@ -93,8 +91,8 @@ impl MmdsNetworkStack { tcp_handler: TcpIPv4Handler::new( ipv4_addr, tcp_port, - max_connections, - max_pending_resets, + NonZeroUsize::new(DEFAULT_MAX_CONNECTIONS).unwrap(), + NonZeroUsize::new(DEFAULT_MAX_PENDING_RESETS).unwrap(), ), mmds, } @@ -105,14 +103,7 @@ impl MmdsNetworkStack { let ipv4_addr = mmds_ipv4_addr.unwrap_or_else(|| Ipv4Addr::from(DEFAULT_IPV4_ADDR)); // The unwrap()s are safe because the given literals are greater than 0. - Self::new( - mac_addr, - ipv4_addr, - DEFAULT_TCP_PORT, - NonZeroUsize::new(DEFAULT_MAX_CONNECTIONS).unwrap(), - NonZeroUsize::new(DEFAULT_MAX_PENDING_RESETS).unwrap(), - mmds, - ) + Self::new(mac_addr, ipv4_addr, DEFAULT_TCP_PORT, mmds) } pub fn set_ipv4_addr(&mut self, ipv4_addr: Ipv4Addr) { diff --git a/src/vmm/src/mmds/persist.rs b/src/vmm/src/mmds/persist.rs index dc0113f8a5c..82feff79bc8 100644 --- a/src/vmm/src/mmds/persist.rs +++ b/src/vmm/src/mmds/persist.rs @@ -4,7 +4,6 @@ //! Defines the structures needed for saving/restoring MmdsNetworkStack. use std::net::Ipv4Addr; -use std::num::NonZeroUsize; use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; @@ -20,8 +19,6 @@ pub struct MmdsNetworkStackState { mac_addr: [u8; MAC_ADDR_LEN as usize], ipv4_addr: u32, tcp_port: u16, - max_connections: NonZeroUsize, - max_pending_resets: NonZeroUsize, } impl Persist<'_> for MmdsNetworkStack { @@ -37,8 +34,6 @@ impl Persist<'_> for MmdsNetworkStack { mac_addr, ipv4_addr: self.ipv4_addr.into(), tcp_port: self.tcp_handler.local_port(), - max_connections: self.tcp_handler.max_connections(), - max_pending_resets: self.tcp_handler.max_pending_resets(), } } @@ -50,8 +45,6 @@ impl Persist<'_> for MmdsNetworkStack { MacAddr::from_bytes_unchecked(&state.mac_addr), Ipv4Addr::from(state.ipv4_addr), state.tcp_port, - state.max_connections, - state.max_pending_resets, mmds, )) } @@ -83,13 +76,5 @@ mod tests { restored_ns.tcp_handler.local_port(), ns.tcp_handler.local_port() ); - assert_eq!( - restored_ns.tcp_handler.max_connections(), - ns.tcp_handler.max_connections() - ); - assert_eq!( - restored_ns.tcp_handler.max_pending_resets(), - ns.tcp_handler.max_pending_resets() - ); } } diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 16d7ed72537..5b01ed49c75 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -157,7 +157,7 @@ pub enum CreateSnapshotError { } /// Snapshot version -pub const SNAPSHOT_VERSION: Version = Version::new(4, 0, 0); +pub const SNAPSHOT_VERSION: Version = Version::new(5, 0, 0); /// Creates a Microvm snapshot. pub fn create_snapshot( From e3e229b820f5aba3b2d53defdc5100cf7a316852 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Wed, 13 Nov 2024 10:46:22 +0000 Subject: [PATCH 08/15] test(mmds): Do not use MmdsNetworkStack::new() in tests There is no need to use MmdsNetworkStack::new() instead of MmdsNetworkStack::new_with_defaults() in tests that pass the same default values. Signed-off-by: Takahiro Itazuri --- src/vmm/src/mmds/ns.rs | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/src/vmm/src/mmds/ns.rs b/src/vmm/src/mmds/ns.rs index d431d71cbcb..8075df8cb91 100644 --- a/src/vmm/src/mmds/ns.rs +++ b/src/vmm/src/mmds/ns.rs @@ -553,14 +553,8 @@ mod tests { let ip = Ipv4Addr::from(DEFAULT_IPV4_ADDR); let other_ip = Ipv4Addr::new(5, 6, 7, 8); let mac = MacAddr::from_bytes_unchecked(&[0; 6]); - let mut ns = MmdsNetworkStack::new( - mac, - ip, - DEFAULT_TCP_PORT, - NonZeroUsize::new(DEFAULT_MAX_CONNECTIONS).unwrap(), - NonZeroUsize::new(DEFAULT_MAX_PENDING_RESETS).unwrap(), - Arc::new(Mutex::new(Mmds::default())), - ); + let mut ns = + MmdsNetworkStack::new_with_defaults(Some(ip), Arc::new(Mutex::new(Mmds::default()))); let mut eth = EthernetFrame::write_incomplete(buf.as_mut(), mac, mac, ETHERTYPE_ARP).unwrap(); @@ -580,14 +574,8 @@ mod tests { let ip = Ipv4Addr::from(DEFAULT_IPV4_ADDR); let other_ip = Ipv4Addr::new(5, 6, 7, 8); let mac = MacAddr::from_bytes_unchecked(&[0; 6]); - let ns = MmdsNetworkStack::new( - mac, - ip, - DEFAULT_TCP_PORT, - NonZeroUsize::new(DEFAULT_MAX_CONNECTIONS).unwrap(), - NonZeroUsize::new(DEFAULT_MAX_PENDING_RESETS).unwrap(), - Arc::new(Mutex::new(Mmds::default())), - ); + let ns = + MmdsNetworkStack::new_with_defaults(Some(ip), Arc::new(Mutex::new(Mmds::default()))); let mut eth = EthernetFrame::write_incomplete(buf.as_mut(), mac, mac, ETHERTYPE_IPV4).unwrap(); @@ -606,14 +594,8 @@ mod tests { let ip = Ipv4Addr::from(DEFAULT_IPV4_ADDR); let other_ip = Ipv4Addr::new(5, 6, 7, 8); let mac = MacAddr::from_bytes_unchecked(&[0; 6]); - let mut ns = MmdsNetworkStack::new( - mac, - ip, - DEFAULT_TCP_PORT, - NonZeroUsize::new(DEFAULT_MAX_CONNECTIONS).unwrap(), - NonZeroUsize::new(DEFAULT_MAX_PENDING_RESETS).unwrap(), - Arc::new(Mutex::new(Mmds::default())), - ); + let mut ns = + MmdsNetworkStack::new_with_defaults(Some(ip), Arc::new(Mutex::new(Mmds::default()))); // try IPv4 with detour_arp let mut eth = From effeb7bc74ae3cec0115111f4125442b43836613 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 14 Nov 2024 10:45:13 +0000 Subject: [PATCH 09/15] chore: Clarify user action We bumped the snapshot version up twice recently, requiring users to regenerate their snapshot, but the user action isn't clearly stated. Signed-off-by: Takahiro Itazuri --- CHANGELOG.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55c46a05276..7da89123eab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,8 @@ and this project adheres to - [#4913](https://github.com/firecracker-microvm/firecracker/pull/4913): Removed unnecessary fields (`max_connections` and `max_pending_resets`) from the - snapshot format, bumping the snapshot version to 5.0.0. + snapshot format, bumping the snapshot version to 5.0.0. Users need to + regenerate snapshots. ### Deprecated @@ -26,8 +27,8 @@ and this project adheres to ### Changed -- [#4907](https://github.com/firecracker-microvm/firecracker/pull/4907): Bump - snapshot version to 4.0.0. +- [#4907](https://github.com/firecracker-microvm/firecracker/pull/4907): Bumped + the snapshot version to 4.0.0, so users need to regenerate snapshots. ## \[1.10.0\] From 1e3fc10e023756da0da24e780726c3c1ed4aa62a Mon Sep 17 00:00:00 2001 From: tommady Date: Fri, 15 Nov 2024 22:02:20 +0000 Subject: [PATCH 10/15] refactor(builder): Refactor vmm builder code to simplify logic eliminate the unnecessary usage of the event_manager argument and fix up aarch64 attach_legacy_devices_aarch64 fn Signed-off-by: tommady --- src/vmm/src/builder.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 4a3eb48e0fc..d0ea317df6a 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -168,7 +168,9 @@ pub mod aarch64 { if cmdline_contains_console { // Make stdout non-blocking. set_stdout_nonblocking(); - let serial = setup_serial_device(event_manager, std::io::stdin(), std::io::stdout())?; + let serial = setup_serial_device(std::io::stdin(), std::io::stdout())?; + event_manager.add_subscriber(serial.clone()); + vmm.mmio_device_manager .register_mmio_serial(vmm.vm.fd(), &mut vmm.resource_allocator, serial, None) .map_err(VmmError::RegisterMMIODevice)?; From ef6e55a6cd8a9843867bdc6b20543fb290c6370d Mon Sep 17 00:00:00 2001 From: tommady Date: Fri, 22 Nov 2024 03:35:40 +0000 Subject: [PATCH 11/15] refactor(builder): address comments remove the aarch64 suffix from the attach_legacy_devices_aarch64 function and ensure that aarch64 smt is always set to false int the configure_system_for_boot function Signed-off-by: tommady --- src/vmm/src/builder.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index d0ea317df6a..51baf54b5b2 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -152,7 +152,7 @@ impl std::convert::From for StartMicrovmError { pub mod aarch64 { use super::*; - fn attach_legacy_devices_aarch64( + fn attach_legacy_devices( event_manager: &mut EventManager, vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, @@ -222,7 +222,8 @@ pub mod aarch64 { let vcpu_config = VcpuConfig { vcpu_count: vm_config.vcpu_count, - smt: vm_config.smt, + // smt does not exist on aarch64 + smt: false, cpu_config, }; @@ -642,8 +643,7 @@ pub fn build_microvm_for_boot( } #[cfg(target_arch = "aarch64")] - aarch::attach_legacy_devices_aarch64(event_manager, &mut vmm, &mut boot_cmdline) - .map_err(Internal)?; + aarch::attach_legacy_devices(event_manager, &mut vmm, &mut boot_cmdline).map_err(Internal)?; attach_vmgenid_device(&mut vmm)?; From 434435097c6ffeabf968bea7dcb31a2807563571 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Wed, 13 Nov 2024 11:07:12 +0000 Subject: [PATCH 12/15] snapshot: Remove max_connections and max_pending_resets fields `TcpIPv4Handler` for MMDS network stack preallocates several buffers whose sizes are saved into a snapshot as `max_connections` and `max_pending_resets` in `MmdsNetworkStackState`. But they are always the same constant hardcoded values (`DEFAULT_MAX_CONNECTIONS` and `DEFAULT_MAX_PENDING_RESETS`) as of today, which means there is no need to save them into a snapshot. Even if we change the hardcoded sizes across Firecracker versions, that should not be a problem. This is because the snapshot feature does not support migration of network connections and those buffers are initialized with empty on snapshot restoration. When migrating from a Firecracker version with larger buffers to another version with smaller ones, guest workloads that worked previously might start to fail due to the less buffer spaces. However, the issue is not a problem of the snapshot feature and it should also occur even on a purely booted microVM (not restored from a snapshot). Thus, it is fine to remove those fields from a snapshot. Since this is a breaking change of the snapshot format, bumps the major version. Signed-off-by: Takahiro Itazuri --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e7be8f518f..36111becb4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,8 +14,8 @@ and this project adheres to - [#4913](https://github.com/firecracker-microvm/firecracker/pull/4913): Removed unnecessary fields (`max_connections` and `max_pending_resets`) from the - snapshot format, bumping the snapshot version to 5.0.0. Users need to - regenerate snapshots. + + snapshot format, bumping the snapshot version to 5.0.0. ### Deprecated From 0edb749428c657c4be97bd64d96933c9662bf2b0 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 14 Nov 2024 10:45:13 +0000 Subject: [PATCH 13/15] chore: Clarify user action We bumped the snapshot version up twice recently, requiring users to regenerate their snapshot, but the user action isn't clearly stated. Signed-off-by: Takahiro Itazuri --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36111becb4e..52b5fb739d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,8 @@ and this project adheres to - [#4913](https://github.com/firecracker-microvm/firecracker/pull/4913): Removed unnecessary fields (`max_connections` and `max_pending_resets`) from the - snapshot format, bumping the snapshot version to 5.0.0. + snapshot format, bumping the snapshot version to 5.0.0. Users need to + regenerate snapshots. ### Deprecated From b336e832ebcfa90b9c82f491fc8e8fb7fa5c268b Mon Sep 17 00:00:00 2001 From: tommady Date: Sat, 23 Nov 2024 07:24:24 +0000 Subject: [PATCH 14/15] refactor(builder): address comments let the x86_64 and aarch64 architectures code can compile and without warnning Signed-off-by: tommady --- CHANGELOG.md | 1 - src/vmm/src/builder.rs | 25 +++++++++++++------------ src/vmm/src/device_manager/persist.rs | 8 +++----- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52b5fb739d7..6e7be8f518f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,6 @@ and this project adheres to - [#4913](https://github.com/firecracker-microvm/firecracker/pull/4913): Removed unnecessary fields (`max_connections` and `max_pending_resets`) from the - snapshot format, bumping the snapshot version to 5.0.0. Users need to regenerate snapshots. diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 51baf54b5b2..54541efcb45 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -148,11 +148,12 @@ impl std::convert::From for StartMicrovmError { } } +/// This module is dedicated to code tailored specifically for the aarch64 architecture #[cfg(target_arch = "aarch64")] pub mod aarch64 { use super::*; - fn attach_legacy_devices( + pub(crate) fn attach_legacy_devices( event_manager: &mut EventManager, vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, @@ -166,8 +167,6 @@ pub mod aarch64 { .contains("console="); if cmdline_contains_console { - // Make stdout non-blocking. - set_stdout_nonblocking(); let serial = setup_serial_device(std::io::stdin(), std::io::stdout())?; event_manager.add_subscriber(serial.clone()); @@ -280,7 +279,7 @@ pub mod aarch64 { Ok(vcpus) } - pub fn load_kernel( + pub(crate) fn load_kernel( boot_config: &BootConfig, guest_memory: &GuestMemoryMmap, ) -> Result { @@ -300,7 +299,7 @@ pub mod aarch64 { Ok(entry_addr.kernel_load) } - pub fn create_vmm_and_vcpus( + pub(crate) fn create_vmm_and_vcpus( instance_info: &InstanceInfo, guest_memory: GuestMemoryMmap, uffd: Option, @@ -315,13 +314,14 @@ pub mod aarch64 { kvm_capabilities, )?; - let vcpus = - create_vcpus(&mut vm, vm_config.vcpu_count, &vmm.vcpus_exit_evt).map_err(Internal)?; + let vcpus = create_vcpus(&mut vmm.vm, vm_config.vcpu_count, &vmm.vcpus_exit_evt) + .map_err(StartMicrovmError::Internal)?; Ok((vmm, vcpus)) } } +/// This module is dedicated to code tailored specifically for the x86_64 architecture #[cfg(target_arch = "x86_64")] pub mod x86_64 { use super::*; @@ -422,13 +422,11 @@ pub mod x86_64 { let vcpu = Vcpu::new(cpu_idx, vm, exit_evt).map_err(VmmError::VcpuCreate)?; vcpus.push(vcpu); } - // Make stdout non blocking. - set_stdout_nonblocking(); Ok(vcpus) } - pub fn load_kernel( + pub(crate) fn load_kernel( boot_config: &BootConfig, guest_memory: &GuestMemoryMmap, ) -> Result { @@ -448,7 +446,7 @@ pub mod x86_64 { Ok(entry_addr.kernel_load) } - pub fn create_vmm_and_vcpus( + pub(crate) fn create_vmm_and_vcpus( instance_info: &InstanceInfo, guest_memory: GuestMemoryMmap, uffd: Option, @@ -643,7 +641,7 @@ pub fn build_microvm_for_boot( } #[cfg(target_arch = "aarch64")] - aarch::attach_legacy_devices(event_manager, &mut vmm, &mut boot_cmdline).map_err(Internal)?; + aarch64::attach_legacy_devices(event_manager, &mut vmm, &mut boot_cmdline).map_err(Internal)?; attach_vmgenid_device(&mut vmm)?; @@ -983,6 +981,9 @@ pub fn setup_serial_device( input: Some(input), }))); + // Make stdout non-blocking. + set_stdout_nonblocking(); + Ok(serial) } diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 5773fa0ba09..9400de57d7e 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -418,13 +418,11 @@ impl<'a> Persist<'a> for MMIODeviceManager { #[cfg(target_arch = "aarch64")] { + use crate::builder::setup_serial_device; + for state in &state.legacy_devices { if state.type_ == DeviceType::Serial { - let serial = crate::builder::setup_serial_device( - constructor_args.event_manager, - std::io::stdin(), - std::io::stdout(), - )?; + let serial = setup_serial_device(std::io::stdin(), std::io::stdout())?; constructor_args .resource_allocator From 34e993b6c9cd51553641c17b9002f547fae402e5 Mon Sep 17 00:00:00 2001 From: tommady Date: Wed, 27 Nov 2024 04:39:30 +0000 Subject: [PATCH 15/15] refactor(builder): fix unitest make the test all passed Signed-off-by: tommady --- src/vmm/src/builder.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 82c735aee12..a8618b600b2 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -414,8 +414,6 @@ pub mod x86_64 { vcpu_count: u8, exit_evt: &EventFd, ) -> Result, VmmError> { - setup_interrupt_controller(vm)?; - let mut vcpus = Vec::with_capacity(vcpu_count as usize); for cpu_idx in 0..vcpu_count { let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; @@ -479,12 +477,12 @@ fn build_vmm( // Set up Kvm Vm and register memory regions. // Build custom CPU config if a custom template is provided. - let vm = Vm::new(kvm_capabilities) + let mut vm = Vm::new(kvm_capabilities) .map_err(VmmError::Vm) - .map_err(StartMicrovmError::Internal)?; + .map_err(Internal)?; vm.memory_init(&guest_memory, vm_config.track_dirty_pages) .map_err(VmmError::Vm) - .map_err(StartMicrovmError::Internal)?; + .map_err(Internal)?; let vcpus_exit_evt = EventFd::new(libc::EFD_NONBLOCK) .map_err(VmmError::EventFd) @@ -512,6 +510,8 @@ fn build_vmm( .map_err(VmmError::EventFd) .map_err(Internal)?; + x86_64::setup_interrupt_controller(&mut vm).map_err(Internal)?; + // create pio dev manager with legacy devices let pio_device_manager = { // TODO Remove these unwraps.