More cleanup

This commit is contained in:
momo5502 2022-04-23 10:25:54 +02:00
parent d2f2c9dfc5
commit 5c2886fea0
2 changed files with 14 additions and 304 deletions

View File

@ -33,43 +33,25 @@ namespace vmx
void initialize_mtrr(mtrr_list& mtrr_data) void initialize_mtrr(mtrr_list& mtrr_data)
{ {
//
// Read the capabilities mask
//
ia32_mtrr_capabilities_register mtrr_capabilities{}; ia32_mtrr_capabilities_register mtrr_capabilities{};
mtrr_capabilities.flags = __readmsr(IA32_MTRR_CAPABILITIES); mtrr_capabilities.flags = __readmsr(IA32_MTRR_CAPABILITIES);
//
// Iterate over each variable MTRR
//
for (auto i = 0u; i < mtrr_capabilities.variable_range_count; i++) for (auto i = 0u; i < mtrr_capabilities.variable_range_count; i++)
{ {
//
// Capture the value
//
ia32_mtrr_physbase_register mtrr_base{}; ia32_mtrr_physbase_register mtrr_base{};
ia32_mtrr_physmask_register mtrr_mask{}; ia32_mtrr_physmask_register mtrr_mask{};
mtrr_base.flags = __readmsr(IA32_MTRR_PHYSBASE0 + i * 2); mtrr_base.flags = __readmsr(IA32_MTRR_PHYSBASE0 + i * 2);
mtrr_mask.flags = __readmsr(IA32_MTRR_PHYSMASK0 + i * 2); mtrr_mask.flags = __readmsr(IA32_MTRR_PHYSMASK0 + i * 2);
//
// Check if the MTRR is enabled
//
mtrr_data[i].type = static_cast<uint32_t>(mtrr_base.type); mtrr_data[i].type = static_cast<uint32_t>(mtrr_base.type);
mtrr_data[i].enabled = static_cast<uint32_t>(mtrr_mask.valid); mtrr_data[i].enabled = static_cast<uint32_t>(mtrr_mask.valid);
if (mtrr_data[i].enabled != FALSE) if (mtrr_data[i].enabled != FALSE)
{ {
//
// Set the base
//
mtrr_data[i].physical_address_min = mtrr_base.page_frame_number * mtrr_data[i].physical_address_min = mtrr_base.page_frame_number *
MTRR_PAGE_SIZE; MTRR_PAGE_SIZE;
// unsigned long bit{};
// Compute the length
//
unsigned long bit;
_BitScanForward64(&bit, mtrr_mask.page_frame_number * MTRR_PAGE_SIZE); _BitScanForward64(&bit, mtrr_mask.page_frame_number * MTRR_PAGE_SIZE);
mtrr_data[i].physical_address_max = mtrr_data[i]. mtrr_data[i].physical_address_max = mtrr_data[i].
physical_address_min + physical_address_min +

View File

@ -239,42 +239,30 @@ bool hypervisor::try_enable_core(const uint64_t system_directory_table_base)
} }
} }
bool enter_root_mode_on_cpu(vmx::state& vm_state) void enter_root_mode_on_cpu(vmx::state& vm_state)
{ {
auto* launch_context = &vm_state.launch_context; auto* launch_context = &vm_state.launch_context;
auto* registers = &launch_context->special_registers; auto* registers = &launch_context->special_registers;
//
// Ensure the the VMCS can fit into a single page
//
ia32_vmx_basic_register basic_register{}; ia32_vmx_basic_register basic_register{};
memset(&basic_register, 0, sizeof(basic_register)); memset(&basic_register, 0, sizeof(basic_register));
basic_register.flags = launch_context->msr_data[0].QuadPart; basic_register.flags = launch_context->msr_data[0].QuadPart;
if (basic_register.vmcs_size_in_bytes > static_cast<uint64_t>(PAGE_SIZE)) if (basic_register.vmcs_size_in_bytes > static_cast<uint64_t>(PAGE_SIZE))
{ {
return false; throw std::runtime_error("VMCS exceeds page size");
} }
//
// Ensure that the VMCS is supported in writeback memory
//
if (basic_register.memory_type != static_cast<uint64_t>(MEMORY_TYPE_WRITE_BACK)) if (basic_register.memory_type != static_cast<uint64_t>(MEMORY_TYPE_WRITE_BACK))
{ {
return false; throw std::runtime_error("VMCS memory type must be write-back");
} }
//
// Ensure that true MSRs can be used for capabilities
//
if (basic_register.must_be_zero) if (basic_register.must_be_zero)
{ {
return false; throw std::runtime_error("Must-be-zero bit is not zero :O");
} }
//
// Ensure that EPT is available with the needed features SimpleVisor uses
//
ia32_vmx_ept_vpid_cap_register ept_vpid_cap_register{}; ia32_vmx_ept_vpid_cap_register ept_vpid_cap_register{};
ept_vpid_cap_register.flags = launch_context->msr_data[12].QuadPart; ept_vpid_cap_register.flags = launch_context->msr_data[12].QuadPart;
@ -282,75 +270,46 @@ bool enter_root_mode_on_cpu(vmx::state& vm_state)
ept_vpid_cap_register.memory_type_write_back && ept_vpid_cap_register.memory_type_write_back &&
ept_vpid_cap_register.pde_2mb_pages) ept_vpid_cap_register.pde_2mb_pages)
{ {
//
// Enable EPT if these features are supported
//
launch_context->ept_controls.flags = 0; launch_context->ept_controls.flags = 0;
launch_context->ept_controls.enable_ept = 1; launch_context->ept_controls.enable_ept = 1;
launch_context->ept_controls.enable_vpid = 1; launch_context->ept_controls.enable_vpid = 1;
} }
//
// Capture the revision ID for the VMXON and VMCS region
//
vm_state.vmx_on.revision_id = launch_context->msr_data[0].LowPart; vm_state.vmx_on.revision_id = launch_context->msr_data[0].LowPart;
vm_state.vmcs.revision_id = launch_context->msr_data[0].LowPart; vm_state.vmcs.revision_id = launch_context->msr_data[0].LowPart;
//
// Store the physical addresses of all per-LP structures allocated
//
launch_context->vmx_on_physical_address = memory::get_physical_address(&vm_state.vmx_on); launch_context->vmx_on_physical_address = memory::get_physical_address(&vm_state.vmx_on);
launch_context->vmcs_physical_address = memory::get_physical_address(&vm_state.vmcs); launch_context->vmcs_physical_address = memory::get_physical_address(&vm_state.vmcs);
launch_context->msr_bitmap_physical_address = memory::get_physical_address(vm_state.msr_bitmap); launch_context->msr_bitmap_physical_address = memory::get_physical_address(vm_state.msr_bitmap);
//
// Update CR0 with the must-be-zero and must-be-one requirements
//
registers->cr0 &= launch_context->msr_data[7].LowPart; registers->cr0 &= launch_context->msr_data[7].LowPart;
registers->cr0 |= launch_context->msr_data[6].LowPart; registers->cr0 |= launch_context->msr_data[6].LowPart;
//
// Do the same for CR4
//
registers->cr4 &= launch_context->msr_data[9].LowPart; registers->cr4 &= launch_context->msr_data[9].LowPart;
registers->cr4 |= launch_context->msr_data[8].LowPart; registers->cr4 |= launch_context->msr_data[8].LowPart;
//
// Update host CR0 and CR4 based on the requirements above
//
__writecr0(registers->cr0); __writecr0(registers->cr0);
__writecr4(registers->cr4); __writecr4(registers->cr4);
//
// Enable VMX Root Mode
//
if (__vmx_on(&launch_context->vmx_on_physical_address)) if (__vmx_on(&launch_context->vmx_on_physical_address))
{ {
return false; throw std::runtime_error("Failed to execute vmx_on");
} }
// auto destructor = utils::finally(__vmx_off);
// Clear the state of the VMCS, setting it to Inactive
//
if (__vmx_vmclear(&launch_context->vmcs_physical_address)) if (__vmx_vmclear(&launch_context->vmcs_physical_address))
{ {
__vmx_off();
return false; throw std::runtime_error("Failed to clear vmcs");
} }
//
// Load the VMCS, setting its state to Active
//
if (__vmx_vmptrld(&launch_context->vmcs_physical_address)) if (__vmx_vmptrld(&launch_context->vmcs_physical_address))
{ {
__vmx_off(); throw std::runtime_error("Failed to load vmcs");
return false;
} }
// destructor.cancel();
// VMX Root Mode is enabled, with an active VMCS.
//
return true;
} }
vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selector_value) vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selector_value)
@ -361,9 +320,6 @@ vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selecto
segment_selector selector{}; segment_selector selector{};
selector.flags = selector_value; selector.flags = selector_value;
//
// Reject LDT or NULL entries
//
if (selector.flags == 0 || selector.table) if (selector.flags == 0 || selector.table)
{ {
result.limit = 0; result.limit = 0;
@ -374,31 +330,11 @@ vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selecto
return result; return result;
} }
// const auto* gdt_entry = reinterpret_cast<segment_descriptor_64*>(gdt_base + static_cast<uint64_t>(selector.index) * 8);
// Read the GDT entry at the given selector, masking out the RPL bits.
//
const auto* gdt_entry = reinterpret_cast<segment_descriptor_64*>(gdt_base + static_cast<uint64_t>(selector.index) *
8);
//
// Write the selector directly
//
result.selector = selector; result.selector = selector;
//
// Use the LSL intrinsic to read the segment limit
//
result.limit = __segmentlimit(selector.flags); result.limit = __segmentlimit(selector.flags);
//
// Build the full 64-bit effective address, keeping in mind that only when
// the System bit is unset, should this be done.
//
// NOTE: The Windows definition of KGDTENTRY64 is WRONG. The "System" field
// is incorrectly defined at the position of where the AVL bit should be.
// The actual location of the SYSTEM bit is encoded as the highest bit in
// the "Type" field.
//
result.base = 0; result.base = 0;
result.base |= static_cast<uint64_t>(gdt_entry->base_address_low); result.base |= static_cast<uint64_t>(gdt_entry->base_address_low);
result.base |= static_cast<uint64_t>(gdt_entry->base_address_middle) << 16; result.base |= static_cast<uint64_t>(gdt_entry->base_address_middle) << 16;
@ -408,9 +344,6 @@ vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selecto
result.base |= static_cast<uint64_t>(gdt_entry->base_address_upper) << 32; result.base |= static_cast<uint64_t>(gdt_entry->base_address_upper) << 32;
} }
//
// Load the access rights
//
result.access_rights.flags = 0; result.access_rights.flags = 0;
result.access_rights.type = gdt_entry->type; result.access_rights.type = gdt_entry->type;
@ -423,9 +356,6 @@ vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selecto
result.access_rights.default_big = gdt_entry->default_big; result.access_rights.default_big = gdt_entry->default_big;
result.access_rights.granularity = gdt_entry->granularity; result.access_rights.granularity = gdt_entry->granularity;
//
// Finally, handle the VMX-specific bits
//
result.access_rights.reserved1 = 0; result.access_rights.reserved1 = 0;
result.access_rights.unusable = !gdt_entry->present; result.access_rights.unusable = !gdt_entry->present;
@ -434,11 +364,6 @@ vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selecto
uint32_t adjust_msr(const ULARGE_INTEGER control_value, const uint64_t desired_value) uint32_t adjust_msr(const ULARGE_INTEGER control_value, const uint64_t desired_value)
{ {
//
// VMX feature/capability MSRs encode the "must be 0" bits in the high word
// of their value, and the "must be 1" bits in the low word of their value.
// Adjust any requested capability/feature based on these requirements.
//
auto result = static_cast<uint32_t>(desired_value); auto result = static_cast<uint32_t>(desired_value);
result &= control_value.HighPart; result &= control_value.HighPart;
result |= control_value.LowPart; result |= control_value.LowPart;
@ -447,13 +372,6 @@ uint32_t adjust_msr(const ULARGE_INTEGER control_value, const uint64_t desired_v
void vmx_handle_invd() void vmx_handle_invd()
{ {
//
// This is the handler for the INVD instruction. Technically it may be more
// correct to use __invd instead of __wbinvd, but that intrinsic doesn't
// actually exist. Additionally, the Windows kernel (or HAL) don't contain
// any example of INVD actually ever being used. Finally, Hyper-V itself
// handles INVD by issuing WBINVD as well, so we'll just do that here too.
//
__wbinvd(); __wbinvd();
} }
@ -464,12 +382,6 @@ void vmx_handle_cpuid(vmx::guest_context& guest_context)
{ {
INT32 cpu_info[4]; INT32 cpu_info[4];
//
// Check for the magic CPUID sequence, and check that it is coming from
// Ring 0. Technically we could also check the RIP and see if this falls
// in the expected function, but we may want to allow a separate "unload"
// driver or code at some point.
//
if ((guest_context.vp_regs->Rax == 0x41414141) && if ((guest_context.vp_regs->Rax == 0x41414141) &&
(guest_context.vp_regs->Rcx == 0x42424242) && (guest_context.vp_regs->Rcx == 0x42424242) &&
((read_vmx(VMCS_GUEST_CS_SELECTOR) & SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK) == DPL_SYSTEM)) ((read_vmx(VMCS_GUEST_CS_SELECTOR) & SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK) == DPL_SYSTEM))
@ -478,35 +390,18 @@ void vmx_handle_cpuid(vmx::guest_context& guest_context)
return; return;
} }
//
// Otherwise, issue the CPUID to the logical processor based on the indexes
// on the VP's GPRs.
//
__cpuidex(cpu_info, static_cast<int32_t>(guest_context.vp_regs->Rax), __cpuidex(cpu_info, static_cast<int32_t>(guest_context.vp_regs->Rax),
static_cast<int32_t>(guest_context.vp_regs->Rcx)); static_cast<int32_t>(guest_context.vp_regs->Rcx));
//
// Check if this was CPUID 1h, which is the features request.
//
if (guest_context.vp_regs->Rax == 1) if (guest_context.vp_regs->Rax == 1)
{ {
//
// Set the Hypervisor Present-bit in RCX, which Intel and AMD have both
// reserved for this indication.
//
cpu_info[2] |= HYPERV_HYPERVISOR_PRESENT_BIT; cpu_info[2] |= HYPERV_HYPERVISOR_PRESENT_BIT;
} }
else if (guest_context.vp_regs->Rax == HYPERV_CPUID_INTERFACE) else if (guest_context.vp_regs->Rax == HYPERV_CPUID_INTERFACE)
{ {
//
// Return our interface identifier
//
cpu_info[0] = 'momo'; cpu_info[0] = 'momo';
} }
//
// Copy the values from the logical processor registers into the VP GPRs.
//
guest_context.vp_regs->Rax = cpu_info[0]; guest_context.vp_regs->Rax = cpu_info[0];
guest_context.vp_regs->Rbx = cpu_info[1]; guest_context.vp_regs->Rbx = cpu_info[1];
guest_context.vp_regs->Rcx = cpu_info[2]; guest_context.vp_regs->Rcx = cpu_info[2];
@ -515,36 +410,18 @@ void vmx_handle_cpuid(vmx::guest_context& guest_context)
void vmx_handle_xsetbv(const vmx::guest_context& guest_context) void vmx_handle_xsetbv(const vmx::guest_context& guest_context)
{ {
//
// Simply issue the XSETBV instruction on the native logical processor.
//
_xsetbv(static_cast<uint32_t>(guest_context.vp_regs->Rcx), _xsetbv(static_cast<uint32_t>(guest_context.vp_regs->Rcx),
guest_context.vp_regs->Rdx << 32 | guest_context.vp_regs->Rax); guest_context.vp_regs->Rdx << 32 | guest_context.vp_regs->Rax);
} }
void vmx_handle_vmx(vmx::guest_context& guest_context) void vmx_handle_vmx(vmx::guest_context& guest_context)
{ {
//
// Set the CF flag, which is how VMX instructions indicate failure
//
guest_context.guest_e_flags |= 0x1; // VM_FAIL_INVALID guest_context.guest_e_flags |= 0x1; // VM_FAIL_INVALID
//
// RFLAGs is actually restored from the VMCS, so update it here
//
__vmx_vmwrite(VMCS_GUEST_RFLAGS, guest_context.guest_e_flags); __vmx_vmwrite(VMCS_GUEST_RFLAGS, guest_context.guest_e_flags);
} }
void vmx_dispatch_vm_exit(vmx::guest_context& guest_context, const vmx::state& vm_state) void vmx_dispatch_vm_exit(vmx::guest_context& guest_context, const vmx::state& vm_state)
{ {
//
// This is the generic VM-Exit handler. Decode the reason for the exit and
// call the appropriate handler. As per Intel specifications, given that we
// have requested no optional exits whatsoever, we should only see CPUID,
// INVD, XSETBV and other VMX instructions. GETSEC cannot happen as we do
// not run in SMX context.
//
switch (guest_context.exit_reason) switch (guest_context.exit_reason)
{ {
case VMX_EXIT_REASON_EXECUTE_CPUID: case VMX_EXIT_REASON_EXECUTE_CPUID:
@ -580,11 +457,6 @@ void vmx_dispatch_vm_exit(vmx::guest_context& guest_context, const vmx::state& v
break; break;
} }
//
// Move the instruction pointer to the next instruction after the one that
// caused the exit. Since we are not doing any special handling or changing
// of execution, this can be done for any exit reason.
//
if (guest_context.increment_rip) if (guest_context.increment_rip)
{ {
guest_context.guest_rip += read_vmx(VMCS_VMEXIT_INSTRUCTION_LENGTH); guest_context.guest_rip += read_vmx(VMCS_VMEXIT_INSTRUCTION_LENGTH);
@ -596,12 +468,6 @@ extern "C" [[ noreturn ]] void vm_exit_handler(CONTEXT* context)
{ {
auto* vm_state = resolve_vm_state_from_context(*context); auto* vm_state = resolve_vm_state_from_context(*context);
//
// Build a little stack context to make it easier to keep track of certain
// guest state, such as the RIP/RSP/RFLAGS, and the exit reason. The rest
// of the general purpose registers come from the context structure that we
// captured on our own with RtlCaptureContext in the assembly entrypoint.
//
vmx::guest_context guest_context{}; vmx::guest_context guest_context{};
guest_context.guest_e_flags = read_vmx(VMCS_GUEST_RFLAGS); guest_context.guest_e_flags = read_vmx(VMCS_GUEST_RFLAGS);
guest_context.guest_rip = read_vmx(VMCS_GUEST_RIP); guest_context.guest_rip = read_vmx(VMCS_GUEST_RIP);
@ -613,68 +479,26 @@ extern "C" [[ noreturn ]] void vm_exit_handler(CONTEXT* context)
guest_context.exit_vm = false; guest_context.exit_vm = false;
guest_context.increment_rip = true; guest_context.increment_rip = true;
//
// Call the generic handler
//
vmx_dispatch_vm_exit(guest_context, *vm_state); vmx_dispatch_vm_exit(guest_context, *vm_state);
//
// Did we hit the magic exit sequence, or should we resume back to the VM
// context?
//
if (guest_context.exit_vm) if (guest_context.exit_vm)
{ {
context->Rcx = 0x43434343; context->Rcx = 0x43434343;
//
// Perform any OS-specific CPU uninitialization work
//
restore_descriptor_tables(vm_state->launch_context); restore_descriptor_tables(vm_state->launch_context);
//
// Our callback routine may have interrupted an arbitrary user process,
// and therefore not a thread running with a systemwide page directory.
// Therefore if we return back to the original caller after turning off
// VMX, it will keep our current "host" CR3 value which we set on entry
// to the PML4 of the SYSTEM process. We want to return back with the
// correct value of the "guest" CR3, so that the currently executing
// process continues to run with its expected address space mappings.
//
__writecr3(read_vmx(VMCS_GUEST_CR3)); __writecr3(read_vmx(VMCS_GUEST_CR3));
//
// Finally, restore the stack, instruction pointer and EFLAGS to the
// original values present when the instruction causing our VM-Exit
// execute (such as ShvVpUninitialize). This will effectively act as
// a longjmp back to that location.
//
context->Rsp = guest_context.guest_rsp; context->Rsp = guest_context.guest_rsp;
context->Rip = guest_context.guest_rip; context->Rip = guest_context.guest_rip;
context->EFlags = static_cast<uint32_t>(guest_context.guest_e_flags); context->EFlags = static_cast<uint32_t>(guest_context.guest_e_flags);
//
// Turn off VMX root mode on this logical processor. We're done here.
//
__vmx_off(); __vmx_off();
} }
else else
{ {
//
// Return into a VMXRESUME intrinsic, which we broke out as its own
// function, in order to allow this to work. No assembly code will be
// needed as RtlRestoreContext will fix all the GPRs, and what we just
// did to RSP will take care of the rest.
//
context->Rip = reinterpret_cast<uint64_t>(resume_vmx); context->Rip = reinterpret_cast<uint64_t>(resume_vmx);
} }
//
// Restore the context to either ShvVmxResume, in which case the CPU's VMX
// facility will do the "true" return back to the VM (but without restoring
// GPRs, which is why we must do it here), or to the original guest's RIP,
// which we use in case an exit was requested. In this case VMX must now be
// off, and this will look like a longjmp to the original stack and RIP.
//
restore_context(context); restore_context(context);
} }
@ -684,14 +508,8 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
auto* state = &launch_context->special_registers; auto* state = &launch_context->special_registers;
auto* context = &launch_context->context_frame; auto* context = &launch_context->context_frame;
//
// Begin by setting the link pointer to the required value for 4KB VMCS.
//
__vmx_vmwrite(VMCS_GUEST_VMCS_LINK_POINTER, ~0ULL); __vmx_vmwrite(VMCS_GUEST_VMCS_LINK_POINTER, ~0ULL);
//
// Enable EPT features if supported
//
if (launch_context->ept_controls.flags != 0) if (launch_context->ept_controls.flags != 0)
{ {
const auto vmx_eptp = vm_state.ept.get_ept_pointer(); const auto vmx_eptp = vm_state.ept.get_ept_pointer();
@ -699,21 +517,8 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
__vmx_vmwrite(VMCS_CTRL_VIRTUAL_PROCESSOR_IDENTIFIER, 1); __vmx_vmwrite(VMCS_CTRL_VIRTUAL_PROCESSOR_IDENTIFIER, 1);
} }
//
// Load the MSR bitmap. Unlike other bitmaps, not having an MSR bitmap will
// trap all MSRs, so we allocated an empty one.
//
__vmx_vmwrite(VMCS_CTRL_MSR_BITMAP_ADDRESS, launch_context->msr_bitmap_physical_address); __vmx_vmwrite(VMCS_CTRL_MSR_BITMAP_ADDRESS, launch_context->msr_bitmap_physical_address);
//
// Enable support for RDTSCP and XSAVES/XRESTORES in the guest. Windows 10
// makes use of both of these instructions if the CPU supports it. By using
// ShvUtilAdjustMsr, these options will be ignored if this processor does
// not actually support the instructions to begin with.
//
// Also enable EPT support, for additional performance and ability to trap
// memory access efficiently.
//
auto ept_controls = launch_context->ept_controls; auto ept_controls = launch_context->ept_controls;
ept_controls.enable_rdtscp = 1; ept_controls.enable_rdtscp = 1;
ept_controls.enable_invpcid = 1; ept_controls.enable_invpcid = 1;
@ -721,17 +526,8 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
__vmx_vmwrite(VMCS_CTRL_SECONDARY_PROCESSOR_BASED_VM_EXECUTION_CONTROLS, __vmx_vmwrite(VMCS_CTRL_SECONDARY_PROCESSOR_BASED_VM_EXECUTION_CONTROLS,
adjust_msr(launch_context->msr_data[11], ept_controls.flags)); adjust_msr(launch_context->msr_data[11], ept_controls.flags));
//
// Enable no pin-based options ourselves, but there may be some required by
// the processor. Use ShvUtilAdjustMsr to add those in.
//
__vmx_vmwrite(VMCS_CTRL_PIN_BASED_VM_EXECUTION_CONTROLS, adjust_msr(launch_context->msr_data[13], 0)); __vmx_vmwrite(VMCS_CTRL_PIN_BASED_VM_EXECUTION_CONTROLS, adjust_msr(launch_context->msr_data[13], 0));
//
// In order for our choice of supporting RDTSCP and XSAVE/RESTORES above to
// actually mean something, we have to request secondary controls. We also
// want to activate the MSR bitmap in order to keep them from being caught.
//
ia32_vmx_procbased_ctls_register procbased_ctls_register{}; ia32_vmx_procbased_ctls_register procbased_ctls_register{};
procbased_ctls_register.activate_secondary_controls = 1; procbased_ctls_register.activate_secondary_controls = 1;
procbased_ctls_register.use_msr_bitmaps = 1; procbased_ctls_register.use_msr_bitmaps = 1;
@ -740,27 +536,18 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
adjust_msr(launch_context->msr_data[14], adjust_msr(launch_context->msr_data[14],
procbased_ctls_register.flags)); procbased_ctls_register.flags));
//
// Make sure to enter us in x64 mode at all times.
//
ia32_vmx_exit_ctls_register exit_ctls_register{}; ia32_vmx_exit_ctls_register exit_ctls_register{};
exit_ctls_register.host_address_space_size = 1; exit_ctls_register.host_address_space_size = 1;
__vmx_vmwrite(VMCS_CTRL_VMEXIT_CONTROLS, __vmx_vmwrite(VMCS_CTRL_VMEXIT_CONTROLS,
adjust_msr(launch_context->msr_data[15], adjust_msr(launch_context->msr_data[15],
exit_ctls_register.flags)); exit_ctls_register.flags));
//
// As we exit back into the guest, make sure to exist in x64 mode as well.
//
ia32_vmx_entry_ctls_register entry_ctls_register{}; ia32_vmx_entry_ctls_register entry_ctls_register{};
entry_ctls_register.ia32e_mode_guest = 1; entry_ctls_register.ia32e_mode_guest = 1;
__vmx_vmwrite(VMCS_CTRL_VMENTRY_CONTROLS, __vmx_vmwrite(VMCS_CTRL_VMENTRY_CONTROLS,
adjust_msr(launch_context->msr_data[16], adjust_msr(launch_context->msr_data[16],
entry_ctls_register.flags)); entry_ctls_register.flags));
//
// Load the CS Segment (Ring 0 Code)
//
vmx::gdt_entry gdt_entry{}; vmx::gdt_entry gdt_entry{};
gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegCs); gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegCs);
__vmx_vmwrite(VMCS_GUEST_CS_SELECTOR, gdt_entry.selector.flags); __vmx_vmwrite(VMCS_GUEST_CS_SELECTOR, gdt_entry.selector.flags);
@ -769,9 +556,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
__vmx_vmwrite(VMCS_GUEST_CS_BASE, gdt_entry.base); __vmx_vmwrite(VMCS_GUEST_CS_BASE, gdt_entry.base);
__vmx_vmwrite(VMCS_HOST_CS_SELECTOR, context->SegCs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); __vmx_vmwrite(VMCS_HOST_CS_SELECTOR, context->SegCs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);
//
// Load the SS Segment (Ring 0 Data)
//
gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegSs); gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegSs);
__vmx_vmwrite(VMCS_GUEST_SS_SELECTOR, gdt_entry.selector.flags); __vmx_vmwrite(VMCS_GUEST_SS_SELECTOR, gdt_entry.selector.flags);
__vmx_vmwrite(VMCS_GUEST_SS_LIMIT, gdt_entry.limit); __vmx_vmwrite(VMCS_GUEST_SS_LIMIT, gdt_entry.limit);
@ -779,9 +563,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
__vmx_vmwrite(VMCS_GUEST_SS_BASE, gdt_entry.base); __vmx_vmwrite(VMCS_GUEST_SS_BASE, gdt_entry.base);
__vmx_vmwrite(VMCS_HOST_SS_SELECTOR, context->SegSs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); __vmx_vmwrite(VMCS_HOST_SS_SELECTOR, context->SegSs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);
//
// Load the DS Segment (Ring 3 Data)
//
gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegDs); gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegDs);
__vmx_vmwrite(VMCS_GUEST_DS_SELECTOR, gdt_entry.selector.flags); __vmx_vmwrite(VMCS_GUEST_DS_SELECTOR, gdt_entry.selector.flags);
__vmx_vmwrite(VMCS_GUEST_DS_LIMIT, gdt_entry.limit); __vmx_vmwrite(VMCS_GUEST_DS_LIMIT, gdt_entry.limit);
@ -789,9 +570,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
__vmx_vmwrite(VMCS_GUEST_DS_BASE, gdt_entry.base); __vmx_vmwrite(VMCS_GUEST_DS_BASE, gdt_entry.base);
__vmx_vmwrite(VMCS_HOST_DS_SELECTOR, context->SegDs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); __vmx_vmwrite(VMCS_HOST_DS_SELECTOR, context->SegDs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);
//
// Load the ES Segment (Ring 3 Data)
//
gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegEs); gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegEs);
__vmx_vmwrite(VMCS_GUEST_ES_SELECTOR, gdt_entry.selector.flags); __vmx_vmwrite(VMCS_GUEST_ES_SELECTOR, gdt_entry.selector.flags);
__vmx_vmwrite(VMCS_GUEST_ES_LIMIT, gdt_entry.limit); __vmx_vmwrite(VMCS_GUEST_ES_LIMIT, gdt_entry.limit);
@ -799,9 +577,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
__vmx_vmwrite(VMCS_GUEST_ES_BASE, gdt_entry.base); __vmx_vmwrite(VMCS_GUEST_ES_BASE, gdt_entry.base);
__vmx_vmwrite(VMCS_HOST_ES_SELECTOR, context->SegEs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); __vmx_vmwrite(VMCS_HOST_ES_SELECTOR, context->SegEs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);
//
// Load the FS Segment (Ring 3 Compatibility-Mode TEB)
//
gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegFs); gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegFs);
__vmx_vmwrite(VMCS_GUEST_FS_SELECTOR, gdt_entry.selector.flags); __vmx_vmwrite(VMCS_GUEST_FS_SELECTOR, gdt_entry.selector.flags);
__vmx_vmwrite(VMCS_GUEST_FS_LIMIT, gdt_entry.limit); __vmx_vmwrite(VMCS_GUEST_FS_LIMIT, gdt_entry.limit);
@ -810,9 +585,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
__vmx_vmwrite(VMCS_HOST_FS_BASE, gdt_entry.base); __vmx_vmwrite(VMCS_HOST_FS_BASE, gdt_entry.base);
__vmx_vmwrite(VMCS_HOST_FS_SELECTOR, context->SegFs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); __vmx_vmwrite(VMCS_HOST_FS_SELECTOR, context->SegFs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);
//
// Load the GS Segment (Ring 3 Data if in Compatibility-Mode, MSR-based in Long Mode)
//
gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegGs); gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegGs);
__vmx_vmwrite(VMCS_GUEST_GS_SELECTOR, gdt_entry.selector.flags); __vmx_vmwrite(VMCS_GUEST_GS_SELECTOR, gdt_entry.selector.flags);
__vmx_vmwrite(VMCS_GUEST_GS_LIMIT, gdt_entry.limit); __vmx_vmwrite(VMCS_GUEST_GS_LIMIT, gdt_entry.limit);
@ -821,9 +593,6 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
__vmx_vmwrite(VMCS_HOST_GS_BASE, state->msr_gs_base); __vmx_vmwrite(VMCS_HOST_GS_BASE, state->msr_gs_base);
__vmx_vmwrite(VMCS_HOST_GS_SELECTOR, context->SegGs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); __vmx_vmwrite(VMCS_HOST_GS_SELECTOR, context->SegGs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);
//
// Load the Task Register (Ring 0 TSS)
//
gdt_entry = convert_gdt_entry(state->gdtr.base_address, state->tr); gdt_entry = convert_gdt_entry(state->gdtr.base_address, state->tr);
__vmx_vmwrite(VMCS_GUEST_TR_SELECTOR, gdt_entry.selector.flags); __vmx_vmwrite(VMCS_GUEST_TR_SELECTOR, gdt_entry.selector.flags);
__vmx_vmwrite(VMCS_GUEST_TR_LIMIT, gdt_entry.limit); __vmx_vmwrite(VMCS_GUEST_TR_LIMIT, gdt_entry.limit);
@ -832,77 +601,40 @@ void setup_vmcs_for_cpu(vmx::state& vm_state)
__vmx_vmwrite(VMCS_HOST_TR_BASE, gdt_entry.base); __vmx_vmwrite(VMCS_HOST_TR_BASE, gdt_entry.base);
__vmx_vmwrite(VMCS_HOST_TR_SELECTOR, state->tr & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK); __vmx_vmwrite(VMCS_HOST_TR_SELECTOR, state->tr & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);
//
// Load the Local Descriptor Table (Ring 0 LDT on Redstone)
//
gdt_entry = convert_gdt_entry(state->gdtr.base_address, state->ldtr); gdt_entry = convert_gdt_entry(state->gdtr.base_address, state->ldtr);
__vmx_vmwrite(VMCS_GUEST_LDTR_SELECTOR, gdt_entry.selector.flags); __vmx_vmwrite(VMCS_GUEST_LDTR_SELECTOR, gdt_entry.selector.flags);
__vmx_vmwrite(VMCS_GUEST_LDTR_LIMIT, gdt_entry.limit); __vmx_vmwrite(VMCS_GUEST_LDTR_LIMIT, gdt_entry.limit);
__vmx_vmwrite(VMCS_GUEST_LDTR_ACCESS_RIGHTS, gdt_entry.access_rights.flags); __vmx_vmwrite(VMCS_GUEST_LDTR_ACCESS_RIGHTS, gdt_entry.access_rights.flags);
__vmx_vmwrite(VMCS_GUEST_LDTR_BASE, gdt_entry.base); __vmx_vmwrite(VMCS_GUEST_LDTR_BASE, gdt_entry.base);
//
// Now load the GDT itself
//
__vmx_vmwrite(VMCS_GUEST_GDTR_BASE, state->gdtr.base_address); __vmx_vmwrite(VMCS_GUEST_GDTR_BASE, state->gdtr.base_address);
__vmx_vmwrite(VMCS_GUEST_GDTR_LIMIT, state->gdtr.limit); __vmx_vmwrite(VMCS_GUEST_GDTR_LIMIT, state->gdtr.limit);
__vmx_vmwrite(VMCS_HOST_GDTR_BASE, state->gdtr.base_address); __vmx_vmwrite(VMCS_HOST_GDTR_BASE, state->gdtr.base_address);
//
// And then the IDT
//
__vmx_vmwrite(VMCS_GUEST_IDTR_BASE, state->idtr.base_address); __vmx_vmwrite(VMCS_GUEST_IDTR_BASE, state->idtr.base_address);
__vmx_vmwrite(VMCS_GUEST_IDTR_LIMIT, state->idtr.limit); __vmx_vmwrite(VMCS_GUEST_IDTR_LIMIT, state->idtr.limit);
__vmx_vmwrite(VMCS_HOST_IDTR_BASE, state->idtr.base_address); __vmx_vmwrite(VMCS_HOST_IDTR_BASE, state->idtr.base_address);
//
// Load CR0
//
__vmx_vmwrite(VMCS_CTRL_CR0_READ_SHADOW, state->cr0); __vmx_vmwrite(VMCS_CTRL_CR0_READ_SHADOW, state->cr0);
__vmx_vmwrite(VMCS_HOST_CR0, state->cr0); __vmx_vmwrite(VMCS_HOST_CR0, state->cr0);
__vmx_vmwrite(VMCS_GUEST_CR0, state->cr0); __vmx_vmwrite(VMCS_GUEST_CR0, state->cr0);
//
// Load CR3 -- do not use the current process' address space for the host,
// because we may be executing in an arbitrary user-mode process right now
// as part of the DPC interrupt we execute in.
//
__vmx_vmwrite(VMCS_HOST_CR3, launch_context->system_directory_table_base); __vmx_vmwrite(VMCS_HOST_CR3, launch_context->system_directory_table_base);
__vmx_vmwrite(VMCS_GUEST_CR3, state->cr3); __vmx_vmwrite(VMCS_GUEST_CR3, state->cr3);
//
// Load CR4
//
__vmx_vmwrite(VMCS_HOST_CR4, state->cr4); __vmx_vmwrite(VMCS_HOST_CR4, state->cr4);
__vmx_vmwrite(VMCS_GUEST_CR4, state->cr4); __vmx_vmwrite(VMCS_GUEST_CR4, state->cr4);
__vmx_vmwrite(VMCS_CTRL_CR4_READ_SHADOW, state->cr4); __vmx_vmwrite(VMCS_CTRL_CR4_READ_SHADOW, state->cr4);
//
// Load debug MSR and register (DR7)
//
__vmx_vmwrite(VMCS_GUEST_DEBUGCTL, state->debug_control); __vmx_vmwrite(VMCS_GUEST_DEBUGCTL, state->debug_control);
__vmx_vmwrite(VMCS_GUEST_DR7, state->kernel_dr7); __vmx_vmwrite(VMCS_GUEST_DR7, state->kernel_dr7);
//
// Finally, load the guest stack, instruction pointer, and rflags, which
// corresponds exactly to the location where RtlCaptureContext will return
// to inside of ShvVpInitialize.
//
const auto stack_pointer = reinterpret_cast<uintptr_t>(vm_state.stack_buffer) + KERNEL_STACK_SIZE - sizeof(CONTEXT); const auto stack_pointer = reinterpret_cast<uintptr_t>(vm_state.stack_buffer) + KERNEL_STACK_SIZE - sizeof(CONTEXT);
__vmx_vmwrite(VMCS_GUEST_RSP, stack_pointer); __vmx_vmwrite(VMCS_GUEST_RSP, stack_pointer);
__vmx_vmwrite(VMCS_GUEST_RIP, reinterpret_cast<uintptr_t>(vm_launch)); __vmx_vmwrite(VMCS_GUEST_RIP, reinterpret_cast<uintptr_t>(vm_launch));
__vmx_vmwrite(VMCS_GUEST_RFLAGS, context->EFlags); __vmx_vmwrite(VMCS_GUEST_RFLAGS, context->EFlags);
//
// Load the hypervisor entrypoint and stack. We give ourselves a standard
// size kernel stack (24KB) and bias for the context structure that the
// hypervisor entrypoint will push on the stack, avoiding the need for RSP
// modifying instructions in the entrypoint. Note that the CONTEXT pointer
// and thus the stack itself, must be 16-byte aligned for ABI compatibility
// with AMD64 -- specifically, XMM operations will fail otherwise, such as
// the ones that RtlCaptureContext will perform.
//
C_ASSERT((KERNEL_STACK_SIZE - sizeof(CONTEXT)) % 16 == 0); C_ASSERT((KERNEL_STACK_SIZE - sizeof(CONTEXT)) % 16 == 0);
__vmx_vmwrite(VMCS_HOST_RSP, stack_pointer); __vmx_vmwrite(VMCS_HOST_RSP, stack_pointer);
__vmx_vmwrite(VMCS_HOST_RIP, reinterpret_cast<uintptr_t>(vm_exit)); __vmx_vmwrite(VMCS_HOST_RIP, reinterpret_cast<uintptr_t>(vm_exit));
@ -922,11 +654,7 @@ void initialize_msrs(vmx::launch_context& launch_context)
initialize_msrs(vm_state.launch_context); initialize_msrs(vm_state.launch_context);
vm_state.ept.initialize(); vm_state.ept.initialize();
if (!enter_root_mode_on_cpu(vm_state)) enter_root_mode_on_cpu(vm_state);
{
throw std::runtime_error("Not available");
}
setup_vmcs_for_cpu(vm_state); setup_vmcs_for_cpu(vm_state);
auto error_code = launch_vmx(); auto error_code = launch_vmx();