mirror of
				https://github.com/momo5502/hypervisor.git
				synced 2025-10-26 08:15:55 +00:00 
			
		
		
		
	More cleanup
This commit is contained in:
		| @@ -33,43 +33,25 @@ namespace vmx | ||||
|  | ||||
| 		void initialize_mtrr(mtrr_list& mtrr_data) | ||||
| 		{ | ||||
| 			// | ||||
| 			// Read the capabilities mask | ||||
| 			// | ||||
| 			ia32_mtrr_capabilities_register mtrr_capabilities{}; | ||||
| 			mtrr_capabilities.flags = __readmsr(IA32_MTRR_CAPABILITIES); | ||||
|  | ||||
| 			// | ||||
| 			// Iterate over each variable MTRR | ||||
| 			// | ||||
| 			for (auto i = 0u; i < mtrr_capabilities.variable_range_count; i++) | ||||
| 			{ | ||||
| 				// | ||||
| 				// Capture the value | ||||
| 				// | ||||
| 				ia32_mtrr_physbase_register mtrr_base{}; | ||||
| 				ia32_mtrr_physmask_register mtrr_mask{}; | ||||
|  | ||||
| 				mtrr_base.flags = __readmsr(IA32_MTRR_PHYSBASE0 + i * 2); | ||||
| 				mtrr_mask.flags = __readmsr(IA32_MTRR_PHYSMASK0 + i * 2); | ||||
|  | ||||
| 				// | ||||
| 				// Check if the MTRR is enabled | ||||
| 				// | ||||
| 				mtrr_data[i].type = static_cast<uint32_t>(mtrr_base.type); | ||||
| 				mtrr_data[i].enabled = static_cast<uint32_t>(mtrr_mask.valid); | ||||
| 				if (mtrr_data[i].enabled != FALSE) | ||||
| 				{ | ||||
| 					// | ||||
| 					// Set the base | ||||
| 					// | ||||
| 					mtrr_data[i].physical_address_min = mtrr_base.page_frame_number * | ||||
| 						MTRR_PAGE_SIZE; | ||||
|  | ||||
| 					// | ||||
| 					// Compute the length | ||||
| 					// | ||||
| 					unsigned long bit; | ||||
| 					unsigned long bit{}; | ||||
| 					_BitScanForward64(&bit, mtrr_mask.page_frame_number * MTRR_PAGE_SIZE); | ||||
| 					mtrr_data[i].physical_address_max = mtrr_data[i]. | ||||
| 						physical_address_min + | ||||
|   | ||||
| @@ -239,42 +239,30 @@ bool hypervisor::try_enable_core(const uint64_t system_directory_table_base) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| bool enter_root_mode_on_cpu(vmx::state& vm_state) | ||||
| void enter_root_mode_on_cpu(vmx::state& vm_state) | ||||
| { | ||||
| 	auto* launch_context = &vm_state.launch_context; | ||||
| 	auto* registers = &launch_context->special_registers; | ||||
|  | ||||
| 	// | ||||
| 	// Ensure the the VMCS can fit into a single page | ||||
| 	// | ||||
| 	ia32_vmx_basic_register basic_register{}; | ||||
| 	memset(&basic_register, 0, sizeof(basic_register)); | ||||
|  | ||||
| 	basic_register.flags = launch_context->msr_data[0].QuadPart; | ||||
| 	if (basic_register.vmcs_size_in_bytes > static_cast<uint64_t>(PAGE_SIZE)) | ||||
| 	{ | ||||
| 		return false; | ||||
| 	  throw std::runtime_error("VMCS exceeds page size"); | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Ensure that the VMCS is supported in writeback memory | ||||
| 	// | ||||
| 	if (basic_register.memory_type != static_cast<uint64_t>(MEMORY_TYPE_WRITE_BACK)) | ||||
| 	{ | ||||
| 		return false; | ||||
| 	  throw std::runtime_error("VMCS memory type must be write-back"); | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Ensure that true MSRs can be used for capabilities | ||||
| 	// | ||||
| 	if (basic_register.must_be_zero) | ||||
| 	{ | ||||
| 		return false; | ||||
| 	  throw std::runtime_error("Must-be-zero bit is not zero :O"); | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Ensure that EPT is available with the needed features SimpleVisor uses | ||||
| 	// | ||||
| 	ia32_vmx_ept_vpid_cap_register ept_vpid_cap_register{}; | ||||
| 	ept_vpid_cap_register.flags = launch_context->msr_data[12].QuadPart; | ||||
|  | ||||
| @@ -282,75 +270,46 @@ bool enter_root_mode_on_cpu(vmx::state& vm_state) | ||||
| 		ept_vpid_cap_register.memory_type_write_back && | ||||
| 		ept_vpid_cap_register.pde_2mb_pages) | ||||
| 	{ | ||||
| 		// | ||||
| 		// Enable EPT if these features are supported | ||||
| 		// | ||||
| 		launch_context->ept_controls.flags = 0; | ||||
| 		launch_context->ept_controls.enable_ept = 1; | ||||
| 		launch_context->ept_controls.enable_vpid = 1; | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Capture the revision ID for the VMXON and VMCS region | ||||
| 	// | ||||
| 	vm_state.vmx_on.revision_id = launch_context->msr_data[0].LowPart; | ||||
| 	vm_state.vmcs.revision_id = launch_context->msr_data[0].LowPart; | ||||
|  | ||||
| 	// | ||||
| 	// Store the physical addresses of all per-LP structures allocated | ||||
| 	// | ||||
| 	launch_context->vmx_on_physical_address = memory::get_physical_address(&vm_state.vmx_on); | ||||
| 	launch_context->vmcs_physical_address = memory::get_physical_address(&vm_state.vmcs); | ||||
| 	launch_context->msr_bitmap_physical_address = memory::get_physical_address(vm_state.msr_bitmap); | ||||
|  | ||||
| 	// | ||||
| 	// Update CR0 with the must-be-zero and must-be-one requirements | ||||
| 	// | ||||
| 	registers->cr0 &= launch_context->msr_data[7].LowPart; | ||||
| 	registers->cr0 |= launch_context->msr_data[6].LowPart; | ||||
|  | ||||
| 	// | ||||
| 	// Do the same for CR4 | ||||
| 	// | ||||
| 	registers->cr4 &= launch_context->msr_data[9].LowPart; | ||||
| 	registers->cr4 |= launch_context->msr_data[8].LowPart; | ||||
|  | ||||
| 	// | ||||
| 	// Update host CR0 and CR4 based on the requirements above | ||||
| 	// | ||||
| 	__writecr0(registers->cr0); | ||||
| 	__writecr4(registers->cr4); | ||||
|  | ||||
| 	// | ||||
| 	// Enable VMX Root Mode | ||||
| 	// | ||||
| 	if (__vmx_on(&launch_context->vmx_on_physical_address)) | ||||
| 	{ | ||||
| 		return false; | ||||
| 	  throw std::runtime_error("Failed to execute vmx_on"); | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Clear the state of the VMCS, setting it to Inactive | ||||
| 	// | ||||
| 	auto destructor = utils::finally(__vmx_off); | ||||
|  | ||||
| 	if (__vmx_vmclear(&launch_context->vmcs_physical_address)) | ||||
| 	{ | ||||
| 		__vmx_off(); | ||||
| 		return false; | ||||
| 		 | ||||
| 		throw std::runtime_error("Failed to clear vmcs"); | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Load the VMCS, setting its state to Active | ||||
| 	// | ||||
| 	if (__vmx_vmptrld(&launch_context->vmcs_physical_address)) | ||||
| 	{ | ||||
| 		__vmx_off(); | ||||
| 		return false; | ||||
| 	  throw std::runtime_error("Failed to load vmcs"); | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// VMX Root Mode is enabled, with an active VMCS. | ||||
| 	// | ||||
| 	return true; | ||||
| 	destructor.cancel(); | ||||
| } | ||||
|  | ||||
| vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selector_value) | ||||
| @@ -361,9 +320,6 @@ vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selecto | ||||
| 	segment_selector selector{}; | ||||
| 	selector.flags = selector_value; | ||||
|  | ||||
| 	// | ||||
| 	// Reject LDT or NULL entries | ||||
| 	// | ||||
| 	if (selector.flags == 0 || selector.table) | ||||
| 	{ | ||||
| 		result.limit = 0; | ||||
| @@ -374,31 +330,11 @@ vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selecto | ||||
| 		return result; | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Read the GDT entry at the given selector, masking out the RPL bits. | ||||
| 	// | ||||
| 	const auto* gdt_entry = reinterpret_cast<segment_descriptor_64*>(gdt_base + static_cast<uint64_t>(selector.index) * | ||||
| 		8); | ||||
| 	const auto* gdt_entry = reinterpret_cast<segment_descriptor_64*>(gdt_base + static_cast<uint64_t>(selector.index) * 8); | ||||
|  | ||||
| 	// | ||||
| 	// Write the selector directly  | ||||
| 	// | ||||
| 	result.selector = selector; | ||||
|  | ||||
| 	// | ||||
| 	// Use the LSL intrinsic to read the segment limit | ||||
| 	// | ||||
| 	result.limit = __segmentlimit(selector.flags); | ||||
|  | ||||
| 	// | ||||
| 	// Build the full 64-bit effective address, keeping in mind that only when | ||||
| 	// the System bit is unset, should this be done. | ||||
| 	// | ||||
| 	// NOTE: The Windows definition of KGDTENTRY64 is WRONG. The "System" field | ||||
| 	// is incorrectly defined at the position of where the AVL bit should be. | ||||
| 	// The actual location of the SYSTEM bit is encoded as the highest bit in | ||||
| 	// the "Type" field. | ||||
| 	// | ||||
| 	result.base = 0; | ||||
| 	result.base |= static_cast<uint64_t>(gdt_entry->base_address_low); | ||||
| 	result.base |= static_cast<uint64_t>(gdt_entry->base_address_middle) << 16; | ||||
| @@ -408,9 +344,6 @@ vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selecto | ||||
| 		result.base |= static_cast<uint64_t>(gdt_entry->base_address_upper) << 32; | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Load the access rights | ||||
| 	// | ||||
| 	result.access_rights.flags = 0; | ||||
|  | ||||
| 	result.access_rights.type = gdt_entry->type; | ||||
| @@ -423,9 +356,6 @@ vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selecto | ||||
| 	result.access_rights.default_big = gdt_entry->default_big; | ||||
| 	result.access_rights.granularity = gdt_entry->granularity; | ||||
|  | ||||
| 	// | ||||
| 	// Finally, handle the VMX-specific bits | ||||
| 	// | ||||
| 	result.access_rights.reserved1 = 0; | ||||
| 	result.access_rights.unusable = !gdt_entry->present; | ||||
|  | ||||
| @@ -434,11 +364,6 @@ vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selecto | ||||
|  | ||||
| uint32_t adjust_msr(const ULARGE_INTEGER control_value, const uint64_t desired_value) | ||||
| { | ||||
| 	// | ||||
| 	// VMX feature/capability MSRs encode the "must be 0" bits in the high word | ||||
| 	// of their value, and the "must be 1" bits in the low word of their value. | ||||
| 	// Adjust any requested capability/feature based on these requirements. | ||||
| 	// | ||||
| 	auto result = static_cast<uint32_t>(desired_value); | ||||
| 	result &= control_value.HighPart; | ||||
| 	result |= control_value.LowPart; | ||||
| @@ -447,13 +372,6 @@ uint32_t adjust_msr(const ULARGE_INTEGER control_value, const uint64_t desired_v | ||||
|  | ||||
| void vmx_handle_invd() | ||||
| { | ||||
| 	// | ||||
| 	// This is the handler for the INVD instruction. Technically it may be more | ||||
| 	// correct to use __invd instead of __wbinvd, but that intrinsic doesn't | ||||
| 	// actually exist. Additionally, the Windows kernel (or HAL) don't contain | ||||
| 	// any example of INVD actually ever being used. Finally, Hyper-V itself | ||||
| 	// handles INVD by issuing WBINVD as well, so we'll just do that here too. | ||||
| 	// | ||||
| 	__wbinvd(); | ||||
| } | ||||
|  | ||||
| @@ -464,12 +382,6 @@ void vmx_handle_cpuid(vmx::guest_context& guest_context) | ||||
| { | ||||
| 	INT32 cpu_info[4]; | ||||
|  | ||||
| 	// | ||||
| 	// Check for the magic CPUID sequence, and check that it is coming from | ||||
| 	// Ring 0. Technically we could also check the RIP and see if this falls | ||||
| 	// in the expected function, but we may want to allow a separate "unload" | ||||
| 	// driver or code at some point. | ||||
| 	// | ||||
| 	if ((guest_context.vp_regs->Rax == 0x41414141) && | ||||
| 		(guest_context.vp_regs->Rcx == 0x42424242) && | ||||
| 		((read_vmx(VMCS_GUEST_CS_SELECTOR) & SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK) == DPL_SYSTEM)) | ||||
| @@ -478,35 +390,18 @@ void vmx_handle_cpuid(vmx::guest_context& guest_context) | ||||
| 		return; | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Otherwise, issue the CPUID to the logical processor based on the indexes | ||||
| 	// on the VP's GPRs. | ||||
| 	// | ||||
| 	__cpuidex(cpu_info, static_cast<int32_t>(guest_context.vp_regs->Rax), | ||||
| 	          static_cast<int32_t>(guest_context.vp_regs->Rcx)); | ||||
|  | ||||
| 	// | ||||
| 	// Check if this was CPUID 1h, which is the features request. | ||||
| 	// | ||||
| 	if (guest_context.vp_regs->Rax == 1) | ||||
| 	{ | ||||
| 		// | ||||
| 		// Set the Hypervisor Present-bit in RCX, which Intel and AMD have both | ||||
| 		// reserved for this indication. | ||||
| 		// | ||||
| 		cpu_info[2] |= HYPERV_HYPERVISOR_PRESENT_BIT; | ||||
| 	} | ||||
| 	else if (guest_context.vp_regs->Rax == HYPERV_CPUID_INTERFACE) | ||||
| 	{ | ||||
| 		// | ||||
| 		// Return our interface identifier | ||||
| 		// | ||||
| 		cpu_info[0] = 'momo'; | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Copy the values from the logical processor registers into the VP GPRs. | ||||
| 	// | ||||
| 	guest_context.vp_regs->Rax = cpu_info[0]; | ||||
| 	guest_context.vp_regs->Rbx = cpu_info[1]; | ||||
| 	guest_context.vp_regs->Rcx = cpu_info[2]; | ||||
| @@ -515,36 +410,18 @@ void vmx_handle_cpuid(vmx::guest_context& guest_context) | ||||
|  | ||||
| void vmx_handle_xsetbv(const vmx::guest_context& guest_context) | ||||
| { | ||||
| 	// | ||||
| 	// Simply issue the XSETBV instruction on the native logical processor. | ||||
| 	// | ||||
|  | ||||
| 	_xsetbv(static_cast<uint32_t>(guest_context.vp_regs->Rcx), | ||||
| 	        guest_context.vp_regs->Rdx << 32 | guest_context.vp_regs->Rax); | ||||
| } | ||||
|  | ||||
| void vmx_handle_vmx(vmx::guest_context& guest_context) | ||||
| { | ||||
| 	// | ||||
| 	// Set the CF flag, which is how VMX instructions indicate failure | ||||
| 	// | ||||
| 	guest_context.guest_e_flags |= 0x1; // VM_FAIL_INVALID | ||||
|  | ||||
| 	// | ||||
| 	// RFLAGs is actually restored from the VMCS, so update it here | ||||
| 	// | ||||
| 	__vmx_vmwrite(VMCS_GUEST_RFLAGS, guest_context.guest_e_flags); | ||||
| } | ||||
|  | ||||
| void vmx_dispatch_vm_exit(vmx::guest_context& guest_context, const vmx::state& vm_state) | ||||
| { | ||||
| 	// | ||||
| 	// This is the generic VM-Exit handler. Decode the reason for the exit and | ||||
| 	// call the appropriate handler. As per Intel specifications, given that we | ||||
| 	// have requested no optional exits whatsoever, we should only see CPUID, | ||||
| 	// INVD, XSETBV and other VMX instructions. GETSEC cannot happen as we do | ||||
| 	// not run in SMX context. | ||||
| 	// | ||||
| 	switch (guest_context.exit_reason) | ||||
| 	{ | ||||
| 	case VMX_EXIT_REASON_EXECUTE_CPUID: | ||||
| @@ -580,11 +457,6 @@ void vmx_dispatch_vm_exit(vmx::guest_context& guest_context, const vmx::state& v | ||||
| 		break; | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Move the instruction pointer to the next instruction after the one that | ||||
| 	// caused the exit. Since we are not doing any special handling or changing | ||||
| 	// of execution, this can be done for any exit reason. | ||||
| 	// | ||||
| 	if (guest_context.increment_rip) | ||||
| 	{ | ||||
| 		guest_context.guest_rip += read_vmx(VMCS_VMEXIT_INSTRUCTION_LENGTH); | ||||
| @@ -596,12 +468,6 @@ extern "C" [[ noreturn ]] void vm_exit_handler(CONTEXT* context) | ||||
| { | ||||
| 	auto* vm_state = resolve_vm_state_from_context(*context); | ||||
|  | ||||
| 	// | ||||
| 	// Build a little stack context to make it easier to keep track of certain | ||||
| 	// guest state, such as the RIP/RSP/RFLAGS, and the exit reason. The rest | ||||
| 	// of the general purpose registers come from the context structure that we | ||||
| 	// captured on our own with RtlCaptureContext in the assembly entrypoint. | ||||
| 	// | ||||
| 	vmx::guest_context guest_context{}; | ||||
| 	guest_context.guest_e_flags = read_vmx(VMCS_GUEST_RFLAGS); | ||||
| 	guest_context.guest_rip = read_vmx(VMCS_GUEST_RIP); | ||||
| @@ -613,68 +479,26 @@ extern "C" [[ noreturn ]] void vm_exit_handler(CONTEXT* context) | ||||
| 	guest_context.exit_vm = false; | ||||
| 	guest_context.increment_rip = true; | ||||
|  | ||||
| 	// | ||||
| 	// Call the generic handler | ||||
| 	// | ||||
| 	vmx_dispatch_vm_exit(guest_context, *vm_state); | ||||
|  | ||||
| 	// | ||||
| 	// Did we hit the magic exit sequence, or should we resume back to the VM | ||||
| 	// context? | ||||
| 	// | ||||
| 	if (guest_context.exit_vm) | ||||
| 	{ | ||||
| 		context->Rcx = 0x43434343; | ||||
|  | ||||
| 		// | ||||
| 		// Perform any OS-specific CPU uninitialization work | ||||
| 		// | ||||
| 		restore_descriptor_tables(vm_state->launch_context); | ||||
|  | ||||
| 		// | ||||
| 		// Our callback routine may have interrupted an arbitrary user process, | ||||
| 		// and therefore not a thread running with a systemwide page directory. | ||||
| 		// Therefore if we return back to the original caller after turning off | ||||
| 		// VMX, it will keep our current "host" CR3 value which we set on entry | ||||
| 		// to the PML4 of the SYSTEM process. We want to return back with the | ||||
| 		// correct value of the "guest" CR3, so that the currently executing | ||||
| 		// process continues to run with its expected address space mappings. | ||||
| 		// | ||||
| 		__writecr3(read_vmx(VMCS_GUEST_CR3)); | ||||
|  | ||||
| 		// | ||||
| 		// Finally, restore the stack, instruction pointer and EFLAGS to the | ||||
| 		// original values present when the instruction causing our VM-Exit | ||||
| 		// execute (such as ShvVpUninitialize). This will effectively act as | ||||
| 		// a longjmp back to that location. | ||||
| 		// | ||||
| 		context->Rsp = guest_context.guest_rsp; | ||||
| 		context->Rip = guest_context.guest_rip; | ||||
| 		context->EFlags = static_cast<uint32_t>(guest_context.guest_e_flags); | ||||
|  | ||||
| 		// | ||||
| 		// Turn off VMX root mode on this logical processor. We're done here. | ||||
| 		// | ||||
| 		__vmx_off(); | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		// | ||||
| 		// Return into a VMXRESUME intrinsic, which we broke out as its own | ||||
| 		// function, in order to allow this to work. No assembly code will be | ||||
| 		// needed as RtlRestoreContext will fix all the GPRs, and what we just | ||||
| 		// did to RSP will take care of the rest. | ||||
| 		// | ||||
| 		context->Rip = reinterpret_cast<uint64_t>(resume_vmx); | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Restore the context to either ShvVmxResume, in which case the CPU's VMX | ||||
| 	// facility will do the "true" return back to the VM (but without restoring | ||||
| 	// GPRs, which is why we must do it here), or to the original guest's RIP, | ||||
| 	// which we use in case an exit was requested. In this case VMX must now be | ||||
| 	// off, and this will look like a longjmp to the original stack and RIP. | ||||
| 	// | ||||
| 	restore_context(context); | ||||
| } | ||||
|  | ||||
| @@ -684,14 +508,8 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 	auto* state = &launch_context->special_registers; | ||||
| 	auto* context = &launch_context->context_frame; | ||||
|  | ||||
| 	// | ||||
| 	// Begin by setting the link pointer to the required value for 4KB VMCS. | ||||
| 	// | ||||
| 	__vmx_vmwrite(VMCS_GUEST_VMCS_LINK_POINTER, ~0ULL); | ||||
|  | ||||
| 	// | ||||
| 	// Enable EPT features if supported | ||||
| 	// | ||||
| 	if (launch_context->ept_controls.flags != 0) | ||||
| 	{ | ||||
| 		const auto vmx_eptp = vm_state.ept.get_ept_pointer(); | ||||
| @@ -699,21 +517,8 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 		__vmx_vmwrite(VMCS_CTRL_VIRTUAL_PROCESSOR_IDENTIFIER, 1); | ||||
| 	} | ||||
|  | ||||
| 	// | ||||
| 	// Load the MSR bitmap. Unlike other bitmaps, not having an MSR bitmap will | ||||
| 	// trap all MSRs, so we allocated an empty one. | ||||
| 	// | ||||
| 	__vmx_vmwrite(VMCS_CTRL_MSR_BITMAP_ADDRESS, launch_context->msr_bitmap_physical_address); | ||||
|  | ||||
| 	// | ||||
| 	// Enable support for RDTSCP and XSAVES/XRESTORES in the guest. Windows 10 | ||||
| 	// makes use of both of these instructions if the CPU supports it. By using | ||||
| 	// ShvUtilAdjustMsr, these options will be ignored if this processor does | ||||
| 	// not actually support the instructions to begin with. | ||||
| 	// | ||||
| 	// Also enable EPT support, for additional performance and ability to trap | ||||
| 	// memory access efficiently. | ||||
| 	// | ||||
| 	auto ept_controls = launch_context->ept_controls; | ||||
| 	ept_controls.enable_rdtscp = 1; | ||||
| 	ept_controls.enable_invpcid = 1; | ||||
| @@ -721,17 +526,8 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 	__vmx_vmwrite(VMCS_CTRL_SECONDARY_PROCESSOR_BASED_VM_EXECUTION_CONTROLS, | ||||
| 	              adjust_msr(launch_context->msr_data[11], ept_controls.flags)); | ||||
|  | ||||
| 	// | ||||
| 	// Enable no pin-based options ourselves, but there may be some required by | ||||
| 	// the processor. Use ShvUtilAdjustMsr to add those in. | ||||
| 	// | ||||
| 	__vmx_vmwrite(VMCS_CTRL_PIN_BASED_VM_EXECUTION_CONTROLS, adjust_msr(launch_context->msr_data[13], 0)); | ||||
|  | ||||
| 	// | ||||
| 	// In order for our choice of supporting RDTSCP and XSAVE/RESTORES above to | ||||
| 	// actually mean something, we have to request secondary controls. We also | ||||
| 	// want to activate the MSR bitmap in order to keep them from being caught. | ||||
| 	// | ||||
| 	ia32_vmx_procbased_ctls_register procbased_ctls_register{}; | ||||
| 	procbased_ctls_register.activate_secondary_controls = 1; | ||||
| 	procbased_ctls_register.use_msr_bitmaps = 1; | ||||
| @@ -740,27 +536,18 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 	              adjust_msr(launch_context->msr_data[14], | ||||
| 	                         procbased_ctls_register.flags)); | ||||
|  | ||||
| 	// | ||||
| 	// Make sure to enter us in x64 mode at all times. | ||||
| 	// | ||||
| 	ia32_vmx_exit_ctls_register exit_ctls_register{}; | ||||
| 	exit_ctls_register.host_address_space_size = 1; | ||||
| 	__vmx_vmwrite(VMCS_CTRL_VMEXIT_CONTROLS, | ||||
| 	              adjust_msr(launch_context->msr_data[15], | ||||
| 	                         exit_ctls_register.flags)); | ||||
|  | ||||
| 	// | ||||
| 	// As we exit back into the guest, make sure to exist in x64 mode as well. | ||||
| 	// | ||||
| 	ia32_vmx_entry_ctls_register entry_ctls_register{}; | ||||
| 	entry_ctls_register.ia32e_mode_guest = 1; | ||||
| 	__vmx_vmwrite(VMCS_CTRL_VMENTRY_CONTROLS, | ||||
| 	              adjust_msr(launch_context->msr_data[16], | ||||
| 	                         entry_ctls_register.flags)); | ||||
|  | ||||
| 	// | ||||
| 	// Load the CS Segment (Ring 0 Code) | ||||
| 	// | ||||
| 	vmx::gdt_entry gdt_entry{}; | ||||
| 	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegCs); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_CS_SELECTOR, gdt_entry.selector.flags); | ||||
| @@ -769,9 +556,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 	__vmx_vmwrite(VMCS_GUEST_CS_BASE, gdt_entry.base); | ||||
| 	__vmx_vmwrite(VMCS_HOST_CS_SELECTOR, context->SegCs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); | ||||
|  | ||||
| 	// | ||||
| 	// Load the SS Segment (Ring 0 Data) | ||||
| 	// | ||||
| 	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegSs); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_SS_SELECTOR, gdt_entry.selector.flags); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_SS_LIMIT, gdt_entry.limit); | ||||
| @@ -779,9 +563,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 	__vmx_vmwrite(VMCS_GUEST_SS_BASE, gdt_entry.base); | ||||
| 	__vmx_vmwrite(VMCS_HOST_SS_SELECTOR, context->SegSs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); | ||||
|  | ||||
| 	// | ||||
| 	// Load the DS Segment (Ring 3 Data) | ||||
| 	// | ||||
| 	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegDs); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_DS_SELECTOR, gdt_entry.selector.flags); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_DS_LIMIT, gdt_entry.limit); | ||||
| @@ -789,9 +570,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 	__vmx_vmwrite(VMCS_GUEST_DS_BASE, gdt_entry.base); | ||||
| 	__vmx_vmwrite(VMCS_HOST_DS_SELECTOR, context->SegDs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); | ||||
|  | ||||
| 	// | ||||
| 	// Load the ES Segment (Ring 3 Data) | ||||
| 	// | ||||
| 	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegEs); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_ES_SELECTOR, gdt_entry.selector.flags); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_ES_LIMIT, gdt_entry.limit); | ||||
| @@ -799,9 +577,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 	__vmx_vmwrite(VMCS_GUEST_ES_BASE, gdt_entry.base); | ||||
| 	__vmx_vmwrite(VMCS_HOST_ES_SELECTOR, context->SegEs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); | ||||
|  | ||||
| 	// | ||||
| 	// Load the FS Segment (Ring 3 Compatibility-Mode TEB) | ||||
| 	// | ||||
| 	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegFs); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_FS_SELECTOR, gdt_entry.selector.flags); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_FS_LIMIT, gdt_entry.limit); | ||||
| @@ -810,9 +585,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 	__vmx_vmwrite(VMCS_HOST_FS_BASE, gdt_entry.base); | ||||
| 	__vmx_vmwrite(VMCS_HOST_FS_SELECTOR, context->SegFs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); | ||||
|  | ||||
| 	// | ||||
| 	// Load the GS Segment (Ring 3 Data if in Compatibility-Mode, MSR-based in Long Mode) | ||||
| 	// | ||||
| 	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegGs); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_GS_SELECTOR, gdt_entry.selector.flags); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_GS_LIMIT, gdt_entry.limit); | ||||
| @@ -821,9 +593,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 	__vmx_vmwrite(VMCS_HOST_GS_BASE, state->msr_gs_base); | ||||
| 	__vmx_vmwrite(VMCS_HOST_GS_SELECTOR, context->SegGs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); | ||||
|  | ||||
| 	// | ||||
| 	// Load the Task Register (Ring 0 TSS) | ||||
| 	// | ||||
| 	gdt_entry = convert_gdt_entry(state->gdtr.base_address, state->tr); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_TR_SELECTOR, gdt_entry.selector.flags); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_TR_LIMIT, gdt_entry.limit); | ||||
| @@ -832,77 +601,40 @@ void setup_vmcs_for_cpu(vmx::state& vm_state) | ||||
| 	__vmx_vmwrite(VMCS_HOST_TR_BASE, gdt_entry.base); | ||||
| 	__vmx_vmwrite(VMCS_HOST_TR_SELECTOR, state->tr & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); | ||||
|  | ||||
| 	// | ||||
| 	// Load the Local Descriptor Table (Ring 0 LDT on Redstone) | ||||
| 	// | ||||
| 	gdt_entry = convert_gdt_entry(state->gdtr.base_address, state->ldtr); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_LDTR_SELECTOR, gdt_entry.selector.flags); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_LDTR_LIMIT, gdt_entry.limit); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_LDTR_ACCESS_RIGHTS, gdt_entry.access_rights.flags); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_LDTR_BASE, gdt_entry.base); | ||||
|  | ||||
| 	// | ||||
| 	// Now load the GDT itself | ||||
| 	// | ||||
| 	__vmx_vmwrite(VMCS_GUEST_GDTR_BASE, state->gdtr.base_address); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_GDTR_LIMIT, state->gdtr.limit); | ||||
| 	__vmx_vmwrite(VMCS_HOST_GDTR_BASE, state->gdtr.base_address); | ||||
|  | ||||
| 	// | ||||
| 	// And then the IDT | ||||
| 	// | ||||
| 	__vmx_vmwrite(VMCS_GUEST_IDTR_BASE, state->idtr.base_address); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_IDTR_LIMIT, state->idtr.limit); | ||||
| 	__vmx_vmwrite(VMCS_HOST_IDTR_BASE, state->idtr.base_address); | ||||
|  | ||||
| 	// | ||||
| 	// Load CR0 | ||||
| 	// | ||||
| 	__vmx_vmwrite(VMCS_CTRL_CR0_READ_SHADOW, state->cr0); | ||||
| 	__vmx_vmwrite(VMCS_HOST_CR0, state->cr0); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_CR0, state->cr0); | ||||
|  | ||||
| 	// | ||||
| 	// Load CR3 -- do not use the current process' address space for the host, | ||||
| 	// because we may be executing in an arbitrary user-mode process right now | ||||
| 	// as part of the DPC interrupt we execute in. | ||||
| 	// | ||||
| 	__vmx_vmwrite(VMCS_HOST_CR3, launch_context->system_directory_table_base); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_CR3, state->cr3); | ||||
|  | ||||
| 	// | ||||
| 	// Load CR4 | ||||
| 	// | ||||
| 	__vmx_vmwrite(VMCS_HOST_CR4, state->cr4); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_CR4, state->cr4); | ||||
| 	__vmx_vmwrite(VMCS_CTRL_CR4_READ_SHADOW, state->cr4); | ||||
|  | ||||
| 	// | ||||
| 	// Load debug MSR and register (DR7) | ||||
| 	// | ||||
| 	__vmx_vmwrite(VMCS_GUEST_DEBUGCTL, state->debug_control); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_DR7, state->kernel_dr7); | ||||
|  | ||||
| 	// | ||||
| 	// Finally, load the guest stack, instruction pointer, and rflags, which | ||||
| 	// corresponds exactly to the location where RtlCaptureContext will return | ||||
| 	// to inside of ShvVpInitialize. | ||||
| 	// | ||||
| 	const auto stack_pointer = reinterpret_cast<uintptr_t>(vm_state.stack_buffer) + KERNEL_STACK_SIZE - sizeof(CONTEXT); | ||||
|  | ||||
| 	__vmx_vmwrite(VMCS_GUEST_RSP, stack_pointer); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_RIP, reinterpret_cast<uintptr_t>(vm_launch)); | ||||
| 	__vmx_vmwrite(VMCS_GUEST_RFLAGS, context->EFlags); | ||||
|  | ||||
| 	// | ||||
| 	// Load the hypervisor entrypoint and stack. We give ourselves a standard | ||||
| 	// size kernel stack (24KB) and bias for the context structure that the | ||||
| 	// hypervisor entrypoint will push on the stack, avoiding the need for RSP | ||||
| 	// modifying instructions in the entrypoint. Note that the CONTEXT pointer | ||||
| 	// and thus the stack itself, must be 16-byte aligned for ABI compatibility | ||||
| 	// with AMD64 -- specifically, XMM operations will fail otherwise, such as | ||||
| 	// the ones that RtlCaptureContext will perform. | ||||
| 	// | ||||
| 	C_ASSERT((KERNEL_STACK_SIZE - sizeof(CONTEXT)) % 16 == 0); | ||||
| 	__vmx_vmwrite(VMCS_HOST_RSP, stack_pointer); | ||||
| 	__vmx_vmwrite(VMCS_HOST_RIP, reinterpret_cast<uintptr_t>(vm_exit)); | ||||
| @@ -922,11 +654,7 @@ void initialize_msrs(vmx::launch_context& launch_context) | ||||
| 	initialize_msrs(vm_state.launch_context); | ||||
| 	vm_state.ept.initialize(); | ||||
|  | ||||
| 	if (!enter_root_mode_on_cpu(vm_state)) | ||||
| 	{ | ||||
| 		throw std::runtime_error("Not available"); | ||||
| 	} | ||||
|  | ||||
| 	enter_root_mode_on_cpu(vm_state); | ||||
| 	setup_vmcs_for_cpu(vm_state); | ||||
|  | ||||
| 	auto error_code = launch_vmx(); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user