hypervisor/src/driver/hypervisor.cpp

#include "std_include.hpp"
#include "hypervisor.hpp"

#include "exception.hpp"
#include "logging.hpp"
#include "finally.hpp"
#include "memory.hpp"
#include "thread.hpp"
#include "assembly.hpp"
#include "process.hpp"
#include "string.hpp"

#define DPL_USER   3
#define DPL_SYSTEM 0

namespace
{
	hypervisor* instance{nullptr};

	bool is_vmx_supported()
	{
		cpuid_eax_01 data{};
		__cpuid(reinterpret_cast<int*>(&data), CPUID_VERSION_INFORMATION);
		return data.cpuid_feature_information_ecx.virtual_machine_extensions;
	}

	bool is_vmx_available()
	{
		ia32_feature_control_register feature_control{};
		feature_control.flags = __readmsr(IA32_FEATURE_CONTROL);
		return feature_control.lock_bit && feature_control.enable_vmx_outside_smx;
	}

	bool is_hypervisor_present()
	{
		cpuid_eax_01 data{};
		__cpuid(reinterpret_cast<int*>(&data), CPUID_VERSION_INFORMATION);
		if ((data.cpuid_feature_information_ecx.flags & HYPERV_HYPERVISOR_PRESENT_BIT) == 0)
		{
			return false;
		}

		int32_t cpuid_data[4] = {0};
		__cpuid(cpuid_data, HYPERV_CPUID_INTERFACE);
		return cpuid_data[0] == 'momo';
	}

	void cpature_special_registers(vmx::special_registers& special_registers)
	{
		special_registers.cr0 = __readcr0();
		special_registers.cr3 = __readcr3();
		special_registers.cr4 = __readcr4();
		special_registers.debug_control = __readmsr(IA32_DEBUGCTL);
		special_registers.msr_gs_base = __readmsr(IA32_GS_BASE);
		special_registers.kernel_dr7 = __readdr(7);
		_sgdt(&special_registers.gdtr);
		__sidt(&special_registers.idtr);
		_str(&special_registers.tr);
		_sldt(&special_registers.ldtr);
	}

	// This absolutely needs to be inlined. Otherwise the stack might be broken upon restoration
	// See: https://github.com/ionescu007/SimpleVisor/issues/48
#define capture_cpu_context(launch_context) \
	      cpature_special_registers((launch_context).special_registers);\
	      RtlCaptureContext(&(launch_context).context_frame)

	void restore_descriptor_tables(vmx::launch_context& launch_context)
	{
		__lgdt(&launch_context.special_registers.gdtr);
		__lidt(&launch_context.special_registers.idtr);
	}

	vmx::state* resolve_vm_state_from_context(CONTEXT& context)
	{
		auto* context_address = reinterpret_cast<uint8_t*>(&context);
		auto* vm_state_address = context_address + sizeof(CONTEXT) - KERNEL_STACK_SIZE;
		return reinterpret_cast<vmx::state*>(vm_state_address);
	}

	uintptr_t read_vmx(const uint32_t vmcs_field_id)
	{
		uintptr_t data{};
		__vmx_vmread(vmcs_field_id, &data);
		return data;
	}

	[[ noreturn ]] void resume_vmx()
	{
		__vmx_vmresume();
	}

	int32_t launch_vmx()
	{
		__vmx_vmlaunch();

		const auto error_code = static_cast<int32_t>(read_vmx(VMCS_VM_INSTRUCTION_ERROR));
		__vmx_off();

		return error_code;
	}

	extern "C" [[ noreturn ]] void vm_launch_handler(CONTEXT* context)
	{
		auto* vm_state = resolve_vm_state_from_context(*context);

		vm_state->launch_context.context_frame.EFlags |= EFLAGS_ALIGNMENT_CHECK_FLAG_FLAG;
		vm_state->launch_context.launched = true;
		restore_context(&vm_state->launch_context.context_frame);
	}
}

hypervisor::hypervisor()
{
	if (instance != nullptr)
	{
		throw std::runtime_error("Hypervisor already instantiated");
	}

	auto destructor = utils::finally([this]()
	{
		this->free_vm_states();
		instance = nullptr;
	});

	instance = this;

	if (!is_vmx_supported())
	{
		throw std::runtime_error("VMX not supported on this machine");
	}

	if (!is_vmx_available())
	{
		throw std::runtime_error("VMX not available on this machine");
	}

	debug_log("VMX supported!\n");
	this->allocate_vm_states();
	this->enable();
	destructor.cancel();
}

hypervisor::~hypervisor()
{
	this->disable_all_ept_hooks();
	this->disable();
	this->free_vm_states();
	instance = nullptr;
}

void hypervisor::disable()
{
	thread::dispatch_on_all_cores([this]()
	{
		this->disable_core();
	});

	debug_log("Hypervisor disabled on all cores\n");
}

bool hypervisor::is_enabled() const
{
	return is_hypervisor_present();
}

bool hypervisor::install_ept_hook(const void* destination, const void* source, const size_t length,
                                  const process_id source_pid, const process_id target_pid,
                                  const utils::list<vmx::ept_translation_hint>& hints)
{
	try
	{
		this->ept_->install_hook(destination, source, length, source_pid, target_pid, hints);
	}
	catch (std::exception& e)
	{
		debug_log("Failed to install ept hook on core %d: %s\n", thread::get_processor_index(), e.what());
		return false;
	}
	catch (...)
	{
		debug_log("Failed to install ept hook on core %d.\n", thread::get_processor_index());
		return false;
	}

	this->invalidate_cores();
	return true;
}

bool hypervisor::install_ept_code_watch_point(const uint64_t physical_page, const process_id source_pid,
                                              const process_id target_pid, const bool invalidate) const
{
	try
	{
		this->ept_->install_code_watch_point(physical_page, source_pid, target_pid);
	}
	catch (std::exception& e)
	{
		debug_log("Failed to install ept watch point on core %d: %s\n", thread::get_processor_index(), e.what());
		return false;
	}
	catch (...)
	{
		debug_log("Failed to install ept watch point on core %d.\n", thread::get_processor_index());
		return false;
	}

	if (invalidate)
	{
		thread::dispatch_on_all_cores([&]
		{
			this->ept_->invalidate();
		});
	}

	return true;
}

bool hypervisor::install_ept_code_watch_points(const uint64_t* physical_pages, const size_t count,
                                               const process_id source_pid, const process_id target_pid) const
{
	bool success = true;
	for (size_t i = 0; i < count; ++i)
	{
		success &= this->install_ept_code_watch_point(physical_pages[i], source_pid, target_pid, false);
	}

	thread::dispatch_on_all_cores([&]
	{
		this->ept_->invalidate();
	});

	return success;
}

void hypervisor::disable_all_ept_hooks() const
{
	this->ept_->disable_all_hooks();

	thread::dispatch_on_all_cores([&]
	{
		const auto* vm_state = this->get_current_vm_state();
		if (!vm_state)
		{
			return;
		}

		if (this->is_enabled())
		{
			vm_state->ept->invalidate();
		}
	});
}

vmx::ept& hypervisor::get_ept() const
{
	return *this->ept_;
}

hypervisor* hypervisor::get_instance()
{
	return instance;
}

bool hypervisor::cleanup_process(const process_id process)
{
	if (!this->ept_->cleanup_process(process))
	{
		return false;
	}

	this->invalidate_cores();
	return true;
}

void hypervisor::enable()
{
	const auto cr3 = __readcr3();

	this->ept_->initialize();

	volatile long failures = 0;
	thread::dispatch_on_all_cores([&]
	{
		if (!this->try_enable_core(cr3))
		{
			InterlockedIncrement(&failures);
		}
	});

	if (failures)
	{
		this->disable();
		throw std::runtime_error("Hypervisor initialization failed");
	}

	debug_log("Hypervisor enabled on %d cores\n", this->vm_state_count_);
}

bool hypervisor::try_enable_core(const uint64_t system_directory_table_base)
{
	try
	{
		this->enable_core(system_directory_table_base);
		return true;
	}
	catch (std::exception& e)
	{
		debug_log("Failed to enable hypervisor on core %d: %s\n", thread::get_processor_index(), e.what());
		return false;
	}
	catch (...)
	{
		debug_log("Failed to enable hypervisor on core %d.\n", thread::get_processor_index());
		return false;
	}
}

void enter_root_mode_on_cpu(vmx::state& vm_state)
{
	auto* launch_context = &vm_state.launch_context;
	auto* registers = &launch_context->special_registers;

	ia32_vmx_basic_register basic_register{};
	memset(&basic_register, 0, sizeof(basic_register));

	basic_register.flags = launch_context->msr_data[0].QuadPart;
	if (basic_register.vmcs_size_in_bytes > static_cast<uint64_t>(PAGE_SIZE))
	{
		throw std::runtime_error("VMCS exceeds page size");
	}

	if (basic_register.memory_type != static_cast<uint64_t>(MEMORY_TYPE_WRITE_BACK))
	{
		throw std::runtime_error("VMCS memory type must be write-back");
	}

	if (basic_register.must_be_zero)
	{
		throw std::runtime_error("Must-be-zero bit is not zero :O");
	}

	ia32_vmx_ept_vpid_cap_register ept_vpid_cap_register{};
	ept_vpid_cap_register.flags = launch_context->msr_data[12].QuadPart;

	if (ept_vpid_cap_register.page_walk_length_4 &&
		ept_vpid_cap_register.memory_type_write_back &&
		ept_vpid_cap_register.pde_2mb_pages)
	{
		launch_context->ept_controls.flags = 0;
		launch_context->ept_controls.enable_ept = 1;
		launch_context->ept_controls.enable_vpid = 1;
	}

	vm_state.vmx_on.revision_id = launch_context->msr_data[0].LowPart;
	vm_state.vmcs.revision_id = launch_context->msr_data[0].LowPart;

	launch_context->vmx_on_physical_address = memory::get_physical_address(&vm_state.vmx_on);
	launch_context->vmcs_physical_address = memory::get_physical_address(&vm_state.vmcs);
	launch_context->msr_bitmap_physical_address = memory::get_physical_address(vm_state.msr_bitmap);

	registers->cr0 &= launch_context->msr_data[7].LowPart;
	registers->cr0 |= launch_context->msr_data[6].LowPart;

	registers->cr4 &= launch_context->msr_data[9].LowPart;
	registers->cr4 |= launch_context->msr_data[8].LowPart;

	__writecr0(registers->cr0);
	__writecr4(registers->cr4);

	if (__vmx_on(&launch_context->vmx_on_physical_address))
	{
		throw std::runtime_error("Failed to execute vmx_on");
	}

	auto destructor = utils::finally([]
	{
		__vmx_off();
	});

	if (__vmx_vmclear(&launch_context->vmcs_physical_address))
	{
		throw std::runtime_error("Failed to clear vmcs");
	}

	if (__vmx_vmptrld(&launch_context->vmcs_physical_address))
	{
		throw std::runtime_error("Failed to load vmcs");
	}

	destructor.cancel();
}

vmx::gdt_entry convert_gdt_entry(const uint64_t gdt_base, const uint16_t selector_value)
{
	vmx::gdt_entry result{};
	memset(&result, 0, sizeof(result));

	segment_selector selector{};
	selector.flags = selector_value;

	if (selector.flags == 0 || selector.table)
	{
		result.limit = 0;
		result.access_rights.flags = 0;
		result.base = 0;
		result.selector.flags = 0;
		result.access_rights.unusable = 1;
		return result;
	}

	const auto* gdt_entry = reinterpret_cast<segment_descriptor_64*>(gdt_base + static_cast<uint64_t>(selector.index) *
		8);

	result.selector = selector;
	result.limit = __segmentlimit(selector.flags);

	result.base = 0;
	result.base |= static_cast<uint64_t>(gdt_entry->base_address_low);
	result.base |= static_cast<uint64_t>(gdt_entry->base_address_middle) << 16;
	result.base |= static_cast<uint64_t>(gdt_entry->base_address_high) << 24;
	if (gdt_entry->descriptor_type == 0u)
	{
		result.base |= static_cast<uint64_t>(gdt_entry->base_address_upper) << 32;
	}

	result.access_rights.flags = 0;

	result.access_rights.type = gdt_entry->type;
	result.access_rights.descriptor_type = gdt_entry->descriptor_type;
	result.access_rights.descriptor_privilege_level = gdt_entry->descriptor_privilege_level;
	result.access_rights.present = gdt_entry->present;
	result.access_rights.reserved1 = gdt_entry->segment_limit_high;
	result.access_rights.available_bit = gdt_entry->system;
	result.access_rights.long_mode = gdt_entry->long_mode;
	result.access_rights.default_big = gdt_entry->default_big;
	result.access_rights.granularity = gdt_entry->granularity;

	result.access_rights.reserved1 = 0;
	result.access_rights.unusable = ~gdt_entry->present;

	return result;
}

uint32_t adjust_msr(const ULARGE_INTEGER control_value, const uint64_t desired_value)
{
	auto result = static_cast<uint32_t>(desired_value);
	result &= control_value.HighPart;
	result |= control_value.LowPart;
	return result;
}

void vmx_handle_invd()
{
	__wbinvd();
}

bool log_other_call(uintptr_t guest_rip, bool rdtsc)
{
	if (guest_rip < 0x140000000 || guest_rip > 0x15BC27000)
	{
		return false;
	}

	const auto is_privileged = (read_vmx(VMCS_GUEST_CS_SELECTOR) &
		SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK) == DPL_SYSTEM;
	if(is_privileged)
	{
		return false;
	}

	const auto proc = process::get_current_process();

	const auto filename = proc.get_image_filename();
	if (!string::equal(filename, "HogwartsLegacy"))
	{
		return false;
	}


	debug_log("%s (%s): %llX\n", rdtsc ? "RDTSC" : "RDTSCP",
	          filename,
	          guest_rip);

	return true;
}

bool log_cpuid_call(uintptr_t guest_rip, uintptr_t rax, uintptr_t rcx, const INT32* cpu_info)
{
	if (guest_rip < 0x140000000 || guest_rip > 0x15BC27000)
	{
		return false;
	}

	const auto is_privileged = (read_vmx(VMCS_GUEST_CS_SELECTOR) &
		SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK) == DPL_SYSTEM;
	if (is_privileged)
	{
		return false;
	}

	const auto proc = process::get_current_process();

	const auto filename = proc.get_image_filename();
	if (!string::equal(filename, "HogwartsLegacy"))
	{
		return false;
	}


	debug_log("CPUID call (%s): %llX - (EAX: %08X - ECX: %08X) - (EAX: %08X - EBX: %08X - ECX: %08X - EDX: %08X)\n",
	          filename,
	          guest_rip, rax, rcx, cpu_info[0], cpu_info[1], cpu_info[2], cpu_info[3]);

	//debug_log("OVERHEAD\n");

	return true;
}

void vmx_handle_cpuid(vmx::guest_context& guest_context)
{
	INT32 cpu_info[4]{0, 0, 0, 0};

	const auto is_privileged = (read_vmx(VMCS_GUEST_CS_SELECTOR) &
		SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK) == DPL_SYSTEM;

	if (guest_context.vp_regs->Rax == 0x41414141 &&
		guest_context.vp_regs->Rcx == 0x42424242 &&
		is_privileged)
	{
		guest_context.exit_vm = true;
		return;
	}

	__cpuidex(cpu_info, static_cast<int32_t>(guest_context.vp_regs->Rax),
	          static_cast<int32_t>(guest_context.vp_regs->Rcx));

	bool should_zero = false;
	if (!is_privileged)
	{
		should_zero = log_cpuid_call(guest_context.guest_rip, guest_context.vp_regs->Rax, guest_context.vp_regs->Rcx,
		                             cpu_info);
	}

	const auto _rax = guest_context.vp_regs->Rax;

	if (guest_context.vp_regs->Rax == 1)
	{
		cpu_info[2] |= HYPERV_HYPERVISOR_PRESENT_BIT;
	}
	else if (guest_context.vp_regs->Rax == HYPERV_CPUID_INTERFACE)
	{
		cpu_info[0] = 'momo';
	}

	if (should_zero)
	{
		// [MOMO] CPUID call(HogwartsLegacy) : 140D2451B - (EAX : 80000006 - ECX : 00000000) - (EAX : 00000000 - EBX : 00000000 - ECX : 01006040 - EDX : 00000000)
		// [MOMO] CPUID call (HogwartsLegacy): 1405F4817 - (EAX: 00000004 - ECX: 00000000) - (EAX: 1C004121 - EBX: 01C0003F - ECX: 0000003F - EDX: 00000000)

		bool allow_all = true;

		// not sure if necessary
		/*if (_rax == 0 && allow_all)
		{
			cpu_info[0] = 0x00000016;
			cpu_info[1] = 0x756E6547;
			cpu_info[2] = 0x6C65746E;
			cpu_info[3] = 0x49656E69;
		}
		else if (_rax == 4 && allow_all)
		{
			cpu_info[0] = 0x00000000;
			cpu_info[1] = 0x01C0003F;
			cpu_info[2] = 0x0000003F;
			cpu_info[3] = 0x00000000;
		}
		else if (_rax == 7 && allow_all)
		{
			cpu_info[0] = 0x1C004121;
			cpu_info[1] = 0x029C6FBF;
			cpu_info[2] = 0x40000000;
			cpu_info[3] = (INT32)0xBC002E00;
		}
		else if (_rax == 0x80000000 && allow_all)
		{
			cpu_info[0] = (INT32)0x80000008;
			cpu_info[1] = 0x00000000;
			cpu_info[2] = 0x00000000;
			cpu_info[3] = 0x00000000;
		}
		else if (_rax == 0x80000006 && allow_all)
		{
			cpu_info[0] = 0x00000000;
			cpu_info[1] = 0x00000000;
			cpu_info[2] = 0x01006040;
			cpu_info[3] = 0x00000000;
		}


		// absolutely necessary v
		else*/ if (_rax == 1 && allow_all)
		{
			cpu_info[0] = 0x000306A9;
			cpu_info[1] = 0x02100800;
			cpu_info[2] = 0x7FBAE3FF & (~0xC000000);
			//cpu_info[0] = 0x000906EA;
			//cpu_info[1] = 0x04100800;
			//cpu_info[2] = 0x7FFAFBFF & (~0xC000000);

			cpu_info[3] = (INT32)0xBFEBFBFF ;
		}
		else if (_rax == 0x80000002)
		{
			cpu_info[0] = 0x65746E49;
			cpu_info[1] = 0x2952286C;
			cpu_info[2] = 0x726F4320;
			cpu_info[3] = 0x4D542865;
		}
		else if (_rax == 0x80000003)
		{
			cpu_info[0] = 0x37692029;
			cpu_info[1] = 0x3538382D;
			cpu_info[2] = 0x43204830;
			cpu_info[3] = 0x40205550;
		}
		else if (_rax == 0x80000004)
		{
			cpu_info[0] = 0x362E3220;
			cpu_info[1] = 0x7A484730;
			cpu_info[2] = 0x00000000;
			cpu_info[3] = 0x00000000;
		}
		else if(false)
		{
			cpu_info[0] = 0;
			cpu_info[1] = 0;
			cpu_info[2] = 0;
			cpu_info[3] = 0;
			debug_log("Not zeroing!\n");
		}

		/*	should_zero &= _rax == 1
				|| _rax == 0x80000002
				|| _rax == 0x80000003
				|| _rax == 0x80000004;
				*
			if (should_zero)
			{
				cpu_info[0] = 0;
				cpu_info[1] = 0;
				cpu_info[2] = 0;
				cpu_info[3] = 0;
			}*/
	}

	guest_context.vp_regs->Rax = cpu_info[0];
	guest_context.vp_regs->Rbx = cpu_info[1];
	guest_context.vp_regs->Rcx = cpu_info[2];
	guest_context.vp_regs->Rdx = cpu_info[3];
}

void vmx_handle_xsetbv(const vmx::guest_context& guest_context)
{
	_xsetbv(static_cast<uint32_t>(guest_context.vp_regs->Rcx),
	        guest_context.vp_regs->Rdx << 32 | guest_context.vp_regs->Rax);
}

void vmx_handle_vmx(vmx::guest_context& guest_context)
{
	guest_context.guest_e_flags |= 0x1; // VM_FAIL_INVALID
	__vmx_vmwrite(VMCS_GUEST_RFLAGS, guest_context.guest_e_flags);
}

void vmx_dispatch_vm_exit(vmx::guest_context& guest_context, const vmx::state& vm_state)
{
	switch (guest_context.exit_reason)
	{
	case VMX_EXIT_REASON_EXECUTE_CPUID:
		vmx_handle_cpuid(guest_context);
		break;
	case VMX_EXIT_REASON_EXECUTE_INVD:
		vmx_handle_invd();
		break;
	case VMX_EXIT_REASON_EXECUTE_XSETBV:
		vmx_handle_xsetbv(guest_context);
		break;
	case VMX_EXIT_REASON_EXECUTE_VMCALL:
	case VMX_EXIT_REASON_EXECUTE_VMCLEAR:
	case VMX_EXIT_REASON_EXECUTE_VMLAUNCH:
	case VMX_EXIT_REASON_EXECUTE_VMPTRLD:
	case VMX_EXIT_REASON_EXECUTE_VMPTRST:
	case VMX_EXIT_REASON_EXECUTE_VMREAD:
	case VMX_EXIT_REASON_EXECUTE_VMRESUME:
	case VMX_EXIT_REASON_EXECUTE_VMWRITE:
	case VMX_EXIT_REASON_EXECUTE_VMXOFF:
	case VMX_EXIT_REASON_EXECUTE_VMXON:
		vmx_handle_vmx(guest_context);
		break;
	case VMX_EXIT_REASON_EPT_VIOLATION:
		vm_state.ept->handle_violation(guest_context);
		break;
	case VMX_EXIT_REASON_EPT_MISCONFIGURATION:
		vm_state.ept->handle_misconfiguration(guest_context);
		break;
	case VMX_EXIT_REASON_EXECUTE_RDTSC:
	{
		//debug_log("VM exit: VMX_EXIT_REASON_EXECUTE_RDTSC\n");

		ULARGE_INTEGER tsc{};

		tsc.QuadPart = __rdtsc();

		guest_context.vp_regs->Rax = tsc.LowPart;
		guest_context.vp_regs->Rdx = tsc.HighPart;

		log_other_call(guest_context.guest_rip, true);
		break;
	}
	case VMX_EXIT_REASON_EXECUTE_RDTSCP:
	{
		//debug_log("VM exit: VMX_EXIT_REASON_EXECUTE_RDTSCP\n");

		uint32_t _rcx{};
		ULARGE_INTEGER tsc{};
		tsc.QuadPart = __rdtscp(&_rcx);

		guest_context.vp_regs->Rax = tsc.LowPart;
		guest_context.vp_regs->Rdx = tsc.HighPart;
		guest_context.vp_regs->Rcx = _rcx;

		log_other_call(guest_context.guest_rip, false);
		break;
	}
	default:
		//debug_log("Unknown VM exit: %X\n",(uint32_t) guest_context.exit_reason);
		break;
	}

	if (guest_context.increment_rip)
	{
		guest_context.guest_rip += read_vmx(VMCS_VMEXIT_INSTRUCTION_LENGTH);
		__vmx_vmwrite(VMCS_GUEST_RIP, guest_context.guest_rip);
	}
}

extern "C" [[ noreturn ]] void vm_exit_handler(CONTEXT* context)
{
	auto* vm_state = resolve_vm_state_from_context(*context);

	vmx::guest_context guest_context{};
	guest_context.guest_e_flags = read_vmx(VMCS_GUEST_RFLAGS);
	guest_context.guest_rip = read_vmx(VMCS_GUEST_RIP);
	guest_context.guest_rsp = read_vmx(VMCS_GUEST_RSP);
	guest_context.guest_physical_address = read_vmx(VMCS_GUEST_PHYSICAL_ADDRESS);
	guest_context.exit_reason = read_vmx(VMCS_EXIT_REASON) & 0xFFFF;
	guest_context.exit_qualification = read_vmx(VMCS_EXIT_QUALIFICATION);
	guest_context.vp_regs = context;
	guest_context.exit_vm = false;
	guest_context.increment_rip = true;

	vmx_dispatch_vm_exit(guest_context, *vm_state);

	if (guest_context.exit_vm)
	{
		context->Rcx = 0x43434343;
		context->Rsp = guest_context.guest_rsp;
		context->Rip = guest_context.guest_rip;
		context->EFlags = static_cast<uint32_t>(guest_context.guest_e_flags);

		restore_descriptor_tables(vm_state->launch_context);

		__writecr3(read_vmx(VMCS_GUEST_CR3));
		__vmx_off();
	}
	else
	{
		context->Rip = reinterpret_cast<uint64_t>(resume_vmx);
	}

	restore_context(context);
}

void setup_vmcs_for_cpu(vmx::state& vm_state)
{
	auto* launch_context = &vm_state.launch_context;
	auto* state = &launch_context->special_registers;
	auto* context = &launch_context->context_frame;

	__vmx_vmwrite(VMCS_GUEST_VMCS_LINK_POINTER, ~0ULL);

	if (launch_context->ept_controls.flags != 0)
	{
		const auto vmx_eptp = vm_state.ept->get_ept_pointer();
		__vmx_vmwrite(VMCS_CTRL_EPT_POINTER, vmx_eptp.flags);
		__vmx_vmwrite(VMCS_CTRL_VIRTUAL_PROCESSOR_IDENTIFIER, 1);
	}

	__vmx_vmwrite(VMCS_CTRL_MSR_BITMAP_ADDRESS, launch_context->msr_bitmap_physical_address);

	auto ept_controls = launch_context->ept_controls;
	ept_controls.enable_rdtscp = 1;
	ept_controls.enable_invpcid = 1;
	ept_controls.enable_xsaves = 1;
	__vmx_vmwrite(VMCS_CTRL_SECONDARY_PROCESSOR_BASED_VM_EXECUTION_CONTROLS,
	              adjust_msr(launch_context->msr_data[11], ept_controls.flags));

	__vmx_vmwrite(VMCS_CTRL_PIN_BASED_VM_EXECUTION_CONTROLS, adjust_msr(launch_context->msr_data[13], 0));

	ia32_vmx_procbased_ctls_register procbased_ctls_register{};
	procbased_ctls_register.activate_secondary_controls = 1;
	procbased_ctls_register.use_msr_bitmaps = 1;
	procbased_ctls_register.rdtsc_exiting = 0;

	__vmx_vmwrite(VMCS_CTRL_PROCESSOR_BASED_VM_EXECUTION_CONTROLS,
	              adjust_msr(launch_context->msr_data[14],
	                         procbased_ctls_register.flags));

	ia32_vmx_exit_ctls_register exit_ctls_register{};
	exit_ctls_register.host_address_space_size = 1;
	__vmx_vmwrite(VMCS_CTRL_VMEXIT_CONTROLS,
	              adjust_msr(launch_context->msr_data[15],
	                         exit_ctls_register.flags));

	ia32_vmx_entry_ctls_register entry_ctls_register{};
	entry_ctls_register.ia32e_mode_guest = 1;
	__vmx_vmwrite(VMCS_CTRL_VMENTRY_CONTROLS,
	              adjust_msr(launch_context->msr_data[16],
	                         entry_ctls_register.flags));

	vmx::gdt_entry gdt_entry{};
	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegCs);
	__vmx_vmwrite(VMCS_GUEST_CS_SELECTOR, gdt_entry.selector.flags);
	__vmx_vmwrite(VMCS_GUEST_CS_LIMIT, gdt_entry.limit);
	__vmx_vmwrite(VMCS_GUEST_CS_ACCESS_RIGHTS, gdt_entry.access_rights.flags);
	__vmx_vmwrite(VMCS_GUEST_CS_BASE, gdt_entry.base);
	__vmx_vmwrite(VMCS_HOST_CS_SELECTOR, context->SegCs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);

	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegSs);
	__vmx_vmwrite(VMCS_GUEST_SS_SELECTOR, gdt_entry.selector.flags);
	__vmx_vmwrite(VMCS_GUEST_SS_LIMIT, gdt_entry.limit);
	__vmx_vmwrite(VMCS_GUEST_SS_ACCESS_RIGHTS, gdt_entry.access_rights.flags);
	__vmx_vmwrite(VMCS_GUEST_SS_BASE, gdt_entry.base);
	__vmx_vmwrite(VMCS_HOST_SS_SELECTOR, context->SegSs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);

	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegDs);
	__vmx_vmwrite(VMCS_GUEST_DS_SELECTOR, gdt_entry.selector.flags);
	__vmx_vmwrite(VMCS_GUEST_DS_LIMIT, gdt_entry.limit);
	__vmx_vmwrite(VMCS_GUEST_DS_ACCESS_RIGHTS, gdt_entry.access_rights.flags);
	__vmx_vmwrite(VMCS_GUEST_DS_BASE, gdt_entry.base);
	__vmx_vmwrite(VMCS_HOST_DS_SELECTOR, context->SegDs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);

	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegEs);
	__vmx_vmwrite(VMCS_GUEST_ES_SELECTOR, gdt_entry.selector.flags);
	__vmx_vmwrite(VMCS_GUEST_ES_LIMIT, gdt_entry.limit);
	__vmx_vmwrite(VMCS_GUEST_ES_ACCESS_RIGHTS, gdt_entry.access_rights.flags);
	__vmx_vmwrite(VMCS_GUEST_ES_BASE, gdt_entry.base);
	__vmx_vmwrite(VMCS_HOST_ES_SELECTOR, context->SegEs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);

	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegFs);
	__vmx_vmwrite(VMCS_GUEST_FS_SELECTOR, gdt_entry.selector.flags);
	__vmx_vmwrite(VMCS_GUEST_FS_LIMIT, gdt_entry.limit);
	__vmx_vmwrite(VMCS_GUEST_FS_ACCESS_RIGHTS, gdt_entry.access_rights.flags);
	__vmx_vmwrite(VMCS_GUEST_FS_BASE, gdt_entry.base);
	__vmx_vmwrite(VMCS_HOST_FS_BASE, gdt_entry.base);
	__vmx_vmwrite(VMCS_HOST_FS_SELECTOR, context->SegFs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);

	gdt_entry = convert_gdt_entry(state->gdtr.base_address, context->SegGs);
	__vmx_vmwrite(VMCS_GUEST_GS_SELECTOR, gdt_entry.selector.flags);
	__vmx_vmwrite(VMCS_GUEST_GS_LIMIT, gdt_entry.limit);
	__vmx_vmwrite(VMCS_GUEST_GS_ACCESS_RIGHTS, gdt_entry.access_rights.flags);
	__vmx_vmwrite(VMCS_GUEST_GS_BASE, state->msr_gs_base);
	__vmx_vmwrite(VMCS_HOST_GS_BASE, state->msr_gs_base);
	__vmx_vmwrite(VMCS_HOST_GS_SELECTOR, context->SegGs & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);

	gdt_entry = convert_gdt_entry(state->gdtr.base_address, state->tr);
	__vmx_vmwrite(VMCS_GUEST_TR_SELECTOR, gdt_entry.selector.flags);
	__vmx_vmwrite(VMCS_GUEST_TR_LIMIT, gdt_entry.limit);
	__vmx_vmwrite(VMCS_GUEST_TR_ACCESS_RIGHTS, gdt_entry.access_rights.flags);
	__vmx_vmwrite(VMCS_GUEST_TR_BASE, gdt_entry.base);
	__vmx_vmwrite(VMCS_HOST_TR_BASE, gdt_entry.base);
	__vmx_vmwrite(VMCS_HOST_TR_SELECTOR, state->tr & ~SEGMENT_ACCESS_RIGHTS_DESCRIPTOR_PRIVILEGE_LEVEL_MASK);

	gdt_entry = convert_gdt_entry(state->gdtr.base_address, state->ldtr);
	__vmx_vmwrite(VMCS_GUEST_LDTR_SELECTOR, gdt_entry.selector.flags);
	__vmx_vmwrite(VMCS_GUEST_LDTR_LIMIT, gdt_entry.limit);
	__vmx_vmwrite(VMCS_GUEST_LDTR_ACCESS_RIGHTS, gdt_entry.access_rights.flags);
	__vmx_vmwrite(VMCS_GUEST_LDTR_BASE, gdt_entry.base);

	__vmx_vmwrite(VMCS_GUEST_GDTR_BASE, state->gdtr.base_address);
	__vmx_vmwrite(VMCS_GUEST_GDTR_LIMIT, state->gdtr.limit);
	__vmx_vmwrite(VMCS_HOST_GDTR_BASE, state->gdtr.base_address);

	__vmx_vmwrite(VMCS_GUEST_IDTR_BASE, state->idtr.base_address);
	__vmx_vmwrite(VMCS_GUEST_IDTR_LIMIT, state->idtr.limit);
	__vmx_vmwrite(VMCS_HOST_IDTR_BASE, state->idtr.base_address);

	__vmx_vmwrite(VMCS_CTRL_CR0_READ_SHADOW, state->cr0);
	__vmx_vmwrite(VMCS_HOST_CR0, state->cr0);
	__vmx_vmwrite(VMCS_GUEST_CR0, state->cr0);

	__vmx_vmwrite(VMCS_HOST_CR3, launch_context->system_directory_table_base);
	__vmx_vmwrite(VMCS_GUEST_CR3, state->cr3);

	__vmx_vmwrite(VMCS_HOST_CR4, state->cr4);
	__vmx_vmwrite(VMCS_GUEST_CR4, state->cr4);
	__vmx_vmwrite(VMCS_CTRL_CR4_READ_SHADOW, state->cr4);

	__vmx_vmwrite(VMCS_GUEST_DEBUGCTL, state->debug_control);
	__vmx_vmwrite(VMCS_GUEST_DR7, state->kernel_dr7);

	const auto stack_pointer = reinterpret_cast<uintptr_t>(vm_state.stack_buffer) + KERNEL_STACK_SIZE - sizeof(CONTEXT);

	__vmx_vmwrite(VMCS_GUEST_RSP, stack_pointer);
	__vmx_vmwrite(VMCS_GUEST_RIP, reinterpret_cast<uintptr_t>(vm_launch));
	__vmx_vmwrite(VMCS_GUEST_RFLAGS, context->EFlags);

	C_ASSERT((KERNEL_STACK_SIZE - sizeof(CONTEXT)) % 16 == 0);
	__vmx_vmwrite(VMCS_HOST_RSP, stack_pointer);
	__vmx_vmwrite(VMCS_HOST_RIP, reinterpret_cast<uintptr_t>(vm_exit));
}

void initialize_msrs(vmx::launch_context& launch_context)
{
	constexpr auto msr_count = sizeof(launch_context.msr_data) / sizeof(launch_context.msr_data[0]);
	for (auto i = 0u; i < msr_count; ++i)
	{
		launch_context.msr_data[i].QuadPart = __readmsr(IA32_VMX_BASIC + i);
	}
}

[[ noreturn ]] void launch_hypervisor(vmx::state& vm_state)
{
	initialize_msrs(vm_state.launch_context);
	//vm_state.ept->initialize();

	enter_root_mode_on_cpu(vm_state);
	setup_vmcs_for_cpu(vm_state);

	auto error_code = launch_vmx();
	throw std::runtime_error(string::va("Failed to launch vmx: %X", error_code));
}


void hypervisor::enable_core(const uint64_t system_directory_table_base)
{
	debug_log("Enabling hypervisor on core %d\n", thread::get_processor_index());
	auto* vm_state = this->get_current_vm_state();

	if (!is_vmx_supported())
	{
		throw std::runtime_error("VMX not supported on this core");
	}

	if (!is_vmx_available())
	{
		throw std::runtime_error("VMX not available on this core");
	}

	vm_state->launch_context.launched = false;
	vm_state->launch_context.system_directory_table_base = system_directory_table_base;

	// Must be inlined here, otherwise the stack is broken
	capture_cpu_context(vm_state->launch_context);

	if (!vm_state->launch_context.launched)
	{
		launch_hypervisor(*vm_state);
	}

	if (!is_hypervisor_present())
	{
		throw std::runtime_error("Hypervisor is not present");
	}
}

void hypervisor::disable_core()
{
	debug_log("Disabling hypervisor on core %d\n", thread::get_processor_index());

	int32_t cpu_info[4]{0};
	__cpuidex(cpu_info, 0x41414141, 0x42424242);

	if (this->is_enabled())
	{
		debug_log("Shutdown for core %d failed. Issuing kernel panic!\n", thread::get_processor_index());
		KeBugCheckEx(DRIVER_VIOLATION, 1, 0, 0, 0);
	}
}

void hypervisor::allocate_vm_states()
{
	if (!this->ept_)
	{
		this->ept_ = memory::allocate_aligned_object<vmx::ept>();
		if (!this->ept_)
		{
			throw std::runtime_error("Failed to allocate ept object");
		}
	}

	if (this->vm_states_)
	{
		throw std::runtime_error("VM states are still in use");
	}

	// As Windows technically supports cpu hot-plugging, keep track of the allocation count.
	// However virtualizing the hot-plugged cpu won't be supported here.
	this->vm_state_count_ = thread::get_processor_count();
	this->vm_states_ = new vmx::state*[this->vm_state_count_]{};

	for (auto i = 0u; i < this->vm_state_count_; ++i)
	{
		this->vm_states_[i] = memory::allocate_aligned_object<vmx::state>();
		if (!this->vm_states_[i])
		{
			throw std::runtime_error("Failed to allocate VM state entries");
		}

		this->vm_states_[i]->ept = this->ept_;
	}
}

void hypervisor::free_vm_states()
{
	if (this->vm_states_)
	{
		for (auto i = 0u; i < this->vm_state_count_; ++i)
		{
			memory::free_aligned_object(this->vm_states_[i]);
		}

		delete[] this->vm_states_;
		this->vm_states_ = nullptr;
		this->vm_state_count_ = 0;
	}

	if (this->ept_)
	{
		memory::free_aligned_object(this->ept_);
		this->ept_ = nullptr;
	}
}

void hypervisor::invalidate_cores() const
{
	thread::dispatch_on_all_cores([&]
	{
		const auto* vm_state = this->get_current_vm_state();
		if (vm_state && this->is_enabled())
		{
			vm_state->ept->invalidate();
		}
	});
}

vmx::state* hypervisor::get_current_vm_state() const
{
	const auto current_core = thread::get_processor_index();
	if (!this->vm_states_ || current_core >= this->vm_state_count_)
	{
		return nullptr;
	}

	return this->vm_states_[current_core];
}