EM : Execution Monitor / Manager 负责运行VM、调度正确的执行类型((Raw-mode, Hardware Assisted, Recompiled orInterpreted),并保持CPU状态同步。EMR3ExecuteVM函数是虚拟机的“主循环”,而每个执行模式具有不同的内部循环(emR3RawExecute、emR3HmExecute和emR3RemExecute)。
本篇先介绍初始化/结束 APIs和Execution loop
14.1 Execution Manger Inits/Term函数
VMMR3_INT_DECL(int) EMR3Init(PVM pVM)
{
PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
PCFGMNODE pCfgEM = CFGMR3GetChild(pCfgRoot, "EM");
CFGMR3QueryBoolDef(pCfgEM, "IemExecutesAll", &pVM->em.s.fIemExecutesAll, false);
//如果没有设置TripleFaultReset,当VMX发生triple fault的时候,会显示一个错误的界面
//如果配置里设置了TripleFaultReset,这会自动重启虚拟机
bool fEnabled;
CFGMR3QueryBoolDef(pCfgEM, "TripleFaultReset", &fEnabled, false);
pVM->em.s.fGuruOnTripleFault = !fEnabled;
//是否开启执行优化(缓存已执行过的代码)
bool fExitOptimizationEnabled = true;
rc = CFGMR3QueryBoolDef(pCfgEM, "ExitOptimizationEnabled", &fExitOptimizationEnabled, true);
//是否开启执行优化R0
bool fExitOptimizationEnabledR0 = true;
rc = CFGMR3QueryBoolDef(pCfgEM, "ExitOptimizationEnabledR0", &fExitOptimizationEnabledR0, true);
fExitOptimizationEnabledR0 &= fExitOptimizationEnabled;
//是否开启执行优化R0PreemptDisabled
bool fExitOptimizationEnabledR0PreemptDisabled = true;
rc = CFGMR3QueryBoolDef(pCfgEM, "ExitOptimizationEnabledR0PreemptDisabled", &fExitOptimizationEnabledR0PreemptDisabled, false);
fExitOptimizationEnabledR0PreemptDisabled &= fExitOptimizationEnabledR0;
//设置EM保存模拟执行结果的缓存大小,默认是8192
uint16_t cHistoryExecMaxInstructions = 8192;
rc = CFGMR3QueryU16Def(pCfgEM, "HistoryExecMaxInstructions", &cHistoryExecMaxInstructions, cHistoryExecMaxInstructions);
//IEMExecForExits函数的最大执行指令个数
uint16_t cHistoryProbeMaxInstructionsWithoutExit = 24;
if (VM_IS_NEM_ENABLED(pVM))
cHistoryProbeMaxInstructionsWithoutExit = 32;
rc = CFGMR3QueryU16Def(pCfgEM, "HistoryProbeMaxInstructionsWithoutExit", &cHistoryProbeMaxInstructionsWithoutExit,
cHistoryProbeMaxInstructionsWithoutExit);
//IEMExecForExits函数的最小执行指令个数
uint16_t cHistoryProbeMinInstructions = cHistoryProbeMaxInstructionsWithoutExit < 0x5554
? (cHistoryProbeMaxInstructionsWithoutExit + 1) * 3 : 0xffff;
rc = CFGMR3QueryU16Def(pCfgEM, "HistoryProbMinInstructions", &cHistoryProbeMinInstructions,
cHistoryProbeMinInstructions);
//赋值到每个VCPU的全局变量里
for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
{
PVMCPU pVCpu = pVM->apCpusR3[idCpu];
pVCpu->em.s.fExitOptimizationEnabled = fExitOptimizationEnabled;
pVCpu->em.s.fExitOptimizationEnabledR0 = fExitOptimizationEnabledR0;
pVCpu->em.s.fExitOptimizationEnabledR0PreemptDisabled = fExitOptimizationEnabledR0PreemptDisabled;
pVCpu->em.s.cHistoryExecMaxInstructions = cHistoryExecMaxInstructions;
pVCpu->em.s.cHistoryProbeMinInstructions = cHistoryProbeMinInstructions;
pVCpu->em.s.cHistoryProbeMaxInstructionsWithoutExit = cHistoryProbeMaxInstructionsWithoutExit;
}
//注册SSM的callback
rc = SSMR3RegisterInternal(pVM, "em", 0, EM_SAVED_STATE_VERSION, 16,
NULL, NULL, NULL,
NULL, emR3Save, NULL,
NULL, emR3Load, NULL);
//初始化每个VCPU的状态
for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
{
PVMCPU pVCpu = pVM->apCpusR3[idCpu];
//0号CPU标记没有启动,其他CPU标记正在等待中断到来
pVCpu->em.s.enmState = idCpu == 0 ? EMSTATE_NONE : EMSTATE_WAIT_SIPI;
pVCpu->em.s.enmPrevState = EMSTATE_NONE;
pVCpu->em.s.u64TimeSliceStart = 0; /* paranoia */
pVCpu->em.s.idxContinueExitRec = UINT16_MAX;
}
//注册调试信息,记录VMexit的历史
emR3InitDbg(pVM);
}
EMR3InitCompleted
VMMR3_INT_DECL(int) EMR3InitCompleted(PVM pVM, VMINITCOMPLETED enmWhat)
{
//什么都没做
return VINF_SUCCESS;
}
EMR3Relocate
VMMR3_INT_DECL(void) EMR3Relocate(PVM pVM)
{
//什么都没做
}
EMR3ResetCpu
VMMR3_INT_DECL(void) EMR3ResetCpu(PVMCPU pVCpu)
{
VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_UNHALT);
//如果CPU处于halted状态,重置一下
if (pVCpu->em.s.enmState == EMSTATE_HALTED)
{
pVCpu->em.s.enmState = pVCpu->idCpu == 0 ? EMSTATE_NONE : EMSTATE_WAIT_SIPI;
}
}
EMR3Reset
VMMR3_INT_DECL(void) EMR3Reset(PVM pVM)
{
//reset每个VCPU
for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
EMR3ResetCpu(pVM->apCpusR3[idCpu]);
}
EMR3Term
VMMR3_INT_DECL(int) EMR3Term(PVM pVM)
{
//什么都没做
return VINF_SUCCESS;
}
14.2 Emulation Thread 的循环函数
Execution 系列函数:
EMR3ExecuteVM:
总入口,从VM.cpp里调入
VMMR3_INT_DECL(int) EMR3ExecuteVM(PVM pVM, PVMCPU pVCpu)
{
//设置出错后的长跳转地址
int rc = setjmp(pVCpu->em.s.u.FatalLongJump);
//开始计算VM执行时间
TMR3NotifyResume(pVM, pVCpu);
//循环执行GuestOS代码
for (;;)
{
//如果有pending的事情,先执行他们
if ( !fFFDone
&& RT_SUCCESS(rc)
&& rc != VINF_EM_TERMINATE
&& rc != VINF_EM_OFF
&& ( VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_REM_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_REM_MASK & ~VMCPU_FF_UNHALT)))
{
rc = emR3ForcedActions(pVM, pVCpu, rc);
VBOXVMM_EM_FF_ALL_RET(pVCpu, rc);
}
EMSTATE const enmOldState = pVCpu->em.s.enmState;
//根据emR3ForcedActions结果设置enmState
switch (rc)
{
case VINF_SUCCESS:
break;
case VINF_EM_RESCHEDULE_RAW:
if (VM_IS_RAW_MODE_ENABLED(pVM))
{
//开启Raw-mode
pVCpu->em.s.enmState = EMSTATE_RAW;
}
else
{
pVCpu->em.s.enmState = EMSTATE_NONE;
}
break;
// to HM or NEM, 如果按照HM模式执行
case VINF_EM_RESCHEDULE_HM:
if (VM_IS_HM_ENABLED(pVM))
{
//可以开启HM
pVCpu->em.s.enmState = EMSTATE_HM;
}
else if (VM_IS_NEM_ENABLED(pVM))
{
//可以开NEM
pVCpu->em.s.enmState = EMSTATE_NEM;
}
else
{
//错误
pVCpu->em.s.enmState = EMSTATE_NONE;
}
break;
}//end of switch
//获取执行模式
EMSTATE const enmNewState = pVCpu->em.s.enmState;
//如果enmState改变,因为有可能是等待外部中断事件已经等到,所以fWait flags需要修改
if (enmOldState != enmNewState)
{
//从halted和fwait active状态变成可以执行GuestOS状态
if ( enmOldState == EMSTATE_HALTED
&& ( (pVCpu->em.s.MWait.fWait & EMMWAIT_FLAG_ACTIVE)
|| VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_UNHALT))
&& ( enmNewState == EMSTATE_RAW
|| enmNewState == EMSTATE_HM
|| enmNewState == EMSTATE_NEM
|| enmNewState == EMSTATE_REM
|| enmNewState == EMSTATE_IEM_THEN_REM
|| enmNewState == EMSTATE_DEBUG_GUEST_RAW
|| enmNewState == EMSTATE_DEBUG_GUEST_HM
|| enmNewState == EMSTATE_DEBUG_GUEST_NEM
|| enmNewState == EMSTATE_DEBUG_GUEST_IEM
|| enmNewState == EMSTATE_DEBUG_GUEST_REM) )
{
if (pVCpu->em.s.MWait.fWait & EMMWAIT_FLAG_ACTIVE)
{
//去掉fwait的active标记
pVCpu->em.s.MWait.fWait &= ~(EMMWAIT_FLAG_ACTIVE | EMMWAIT_FLAG_BREAKIRQIF0);
}
if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_UNHALT))
{
//去掉unhalt标记,可以开始执行GuestOS代码
VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_UNHALT);
}
}
}
switch (enmNewState)
{
//不支持EMSTATE_RAW
case EMSTATE_RAW:
rc = VERR_EM_INTERNAL_ERROR;
break;
//调用对应模式的Execute函数
case EMSTATE_HM:
rc = emR3HmExecute(pVM, pVCpu, &fFFDone);
break;
case EMSTATE_NEM:
rc = VBOXSTRICTRC_TODO(emR3NemExecute(pVM, pVCpu, &fFFDone));
break;
case EMSTATE_REM:
rc = emR3RemExecute(pVM, pVCpu, &fFFDone);
break;
//IEM模式,调用IEMExecLots模拟执行
case EMSTATE_IEM:
{
uint32_t cInstructions = 0;
//返回执行了多少指令
rc = VBOXSTRICTRC_TODO(IEMExecLots(pVCpu, 4096 /*cMaxInstructions*/, 2047 /*cPollRate*/, &cInstructions));
fFFDone = false;
break;
}
//先执行IEM,然后切换到HM/RAW
case EMSTATE_IEM_THEN_REM:
{
rc = VBOXSTRICTRC_TODO(emR3ExecuteIemThenRem(pVM, pVCpu, &fFFDone));
break;
}
case VINF_EM_RESUME:
//如果前一个状态不是wait,恢复到prevstate
if ( pVCpu->em.s.enmPrevState == EMSTATE_WAIT_SIPI
|| pVCpu->em.s.enmPrevState == EMSTATE_HALTED)
{
pVCpu->em.s.enmState = pVCpu->em.s.enmPrevState;
break;
}
//如果前一个状态是wait,继续执行下面VINF_EM_RESCHEDULE分支
case VINF_EM_RESCHEDULE:
{
//根据现在的上下决定下一个状态
EMSTATE enmState = emR3Reschedule(pVM, pVCpu);
//如果state改变成EMSTATE_IEM_THEN_REM,指令计数器清零
if (pVCpu->em.s.enmState != enmState && enmState == EMSTATE_IEM_THEN_REM)
pVCpu->em.s.cIemThenRemInstructions = 0;
pVCpu->em.s.enmState = enmState;
break;
}
//等待外部中断到来
case EMSTATE_WAIT_SIPI:
case EMSTATE_HALTED:
//外部中断已经到了,返回VINF_EM_RESCHEDULE继续执行GuestOS
if (TRPMHasTrap(pVCpu))
rc = VINF_EM_RESCHEDULE;
//如果GuestOS在执行fWait状态而而且设置了EMMWAIT_FLAG_BREAKIRQIF0(pending中断到来可以被唤醒
else if ( (pVCpu->em.s.MWait.fWait & (EMMWAIT_FLAG_ACTIVE | EMMWAIT_FLAG_BREAKIRQIF0))
== (EMMWAIT_FLAG_ACTIVE | EMMWAIT_FLAG_BREAKIRQIF0))
{
//等待外部事件到来
rc = VMR3WaitHalted(pVM, pVCpu, false /*fIgnoreInterrupts*/);
if (rc == VINF_SUCCESS)
{
//有pending的中断事件,先处理中断
if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
APICUpdatePendingInterrupts(pVCpu);
if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC
| VMCPU_FF_INTERRUPT_NESTED_GUEST
| VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI | VMCPU_FF_UNHALT))
{
rc = VINF_EM_RESCHEDULE;
}
}
}
else
{
//等待外部事件到来
rc = VMR3WaitHalted(pVM, pVCpu, !(CPUMGetGuestEFlags(pVCpu) & X86_EFL_IF));
//如果等到了外部事件,返回VINF_EM_RESCHEDULE继续执行GuestOS
if ( rc == VINF_SUCCESS
&& VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI | VMCPU_FF_UNHALT))
{
rc = VINF_EM_RESCHEDULE;
}
}
break;
case EMSTATE_SUSPENDED:
//通知TM GuestOS运行结束
TMR3NotifySuspend(pVM, pVCpu);
//返回到上一层函数
return VINF_EM_SUSPEND;
//调试GuestOS,调用emR3Debug
case EMSTATE_DEBUG_GUEST_RAW:
case EMSTATE_DEBUG_GUEST_HM:
case EMSTATE_DEBUG_GUEST_NEM:
case EMSTATE_DEBUG_GUEST_IEM:
case EMSTATE_DEBUG_GUEST_REM:
TMR3NotifySuspend(pVM, pVCpu);
rc = VBOXSTRICTRC_TODO(emR3Debug(pVM, pVCpu, rc));
TMR3NotifyResume(pVM, pVCpu);
break;
...
//state无法定义或者被标记正在结束虚拟机,返回上一层函数
case EMSTATE_NONE:
case EMSTATE_TERMINATING:
default:
pVCpu->em.s.enmState = EMSTATE_GURU_MEDITATION;
TMR3NotifySuspend(pVM, pVCpu);
return VERR_EM_INTERNAL_ERROR;
}//end of switch
}//end of for
}
emR3HmExecute
HM模式下执行GuestOS代码
int emR3HmExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone)
{
for (;;)
{
//检查是否需要重新检查模式,比如进入实模式等
if (HMR3IsRescheduleRequired(pVM, &pVCpu->cpum.GstCtx))
{
rc = VINF_EM_RESCHEDULE;
break;
}
//执行FroceAction
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_RAW_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK))
{
rc = emR3HmForcedActions(pVM, pVCpu);
if (rc != VINF_SUCCESS)
break;
}
if (RT_LIKELY(emR3IsExecutionAllowed(pVM, pVCpu)))
{
//调用VMM里的API执行GuestOS代码
rc = VMMR3HmRunGC(pVM, pVCpu);
}
else
{
//没有获取GuestOS执行权限,Sleep5毫秒
RTThreadSleep(5);
rc = VINF_SUCCESS;
}
//处理GuestOS的返回状态
if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
break;
//处理退出状态
rc = emR3HmHandleRC(pVM, pVCpu, rc);
if (rc != VINF_SUCCESS)
break;
//检查是否有ForceAction没有处理
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_MASK))
{
rc = emR3ForcedActions(pVM, pVCpu, rc);
if ( rc != VINF_SUCCESS
&& rc != VINF_EM_RESCHEDULE_HM)
{
//ForcedAction里发现有需要需要退出Execution循环到事件,退出执行
*pfFFDone = true;
break;
}
}
}
}
emR3RemExecute
static int emR3RemExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone)
{
//循环执行REM代码
for (;;)
{
//执行GuestOS代码
if (RT_LIKELY(emR3IsExecutionAllowed(pVM, pVCpu)))
{
//调用IEMExecLots执行代码,最大8192条指令
rc = VBOXSTRICTRC_TODO(IEMExecLots(pVCpu, 8192 /*cMaxInstructions*/, 4095 /*cPollRate*/, NULL /*pcInstructions*/));
}
else
{
//暂时无法执行GuestOS,sleep5毫秒
RTThreadSleep(5);
rc = VINF_SUCCESS;
}
//执行emR3HighPriorityPostForcedActions
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_POST_MASK))
rc = VBOXSTRICTRC_TODO(emR3HighPriorityPostForcedActions(pVM, pVCpu, rc));
//返回值的处理
if (rc != VINF_SUCCESS)
{
//返回值不对,返回
if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
break;
//不是被forceaction打断的返回
if (rc != VINF_REM_INTERRUPED_FF)
{
//尝试用其他方法执行IEM不支持的指令
if ( rc == VERR_IEM_ASPECT_NOT_IMPLEMENTED
|| rc == VERR_IEM_INSTR_NOT_IMPLEMENTED)
{
EMSTATE enmNewState = emR3Reschedule(pVM, pVCpu);
//可以切换到其他模式,返回VINF_EM_RESCHEDULE
if (enmNewState != EMSTATE_REM && enmNewState != EMSTATE_IEM_THEN_REM)
{
rc = VINF_EM_RESCHEDULE;
break;
}
}
//无法切换模式,返回错误
AssertMsg(rc == VERR_REM_TOO_MANY_TRAPS, ("Unknown GC return code: %Rra\n", rc));
break;
}
}
//执行forceaction
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_REM_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_REM_MASK) )
{
rc = emR3ForcedActions(pVM, pVCpu, rc);
VBOXVMM_EM_FF_ALL_RET(pVCpu, rc);
if ( rc != VINF_SUCCESS
&& rc != VINF_EM_RESCHEDULE_REM)
{
*pfFFDone = true;
break;
}
}
//每循环执行8次后看是否可以切换到其他模式
if (!(++cLoops & 7))
{
EMSTATE enmCheck = emR3Reschedule(pVM, pVCpu);
if ( enmCheck != EMSTATE_REM
&& enmCheck != EMSTATE_IEM_THEN_REM)
return VINF_EM_RESCHEDULE;
}
} /* The Inner Loop, recompiled execution mode version. */
return rc;
}
emR3ExecuteIemThenRem
先尝试执行IEM,如果出错了或者执行指令条数太多,转成REM
emR3ExecuteIemThenRem和emR3RemExecute都是调用IEMExecLots模拟执行指令,区别是emR3RemExecute可以一直执行,emR3ExecuteIemThenRem最多执行1024条指令,1024条指令之后切换成REM模式。
static VBOXSTRICTRC emR3ExecuteIemThenRem(PVM pVM, PVMCPU pVCpu, bool *pfFFDone)
{
while (pVCpu->em.s.cIemThenRemInstructions < 1024)
{
//调用IEM执行代码
uint32_t cInstructions;
VBOXSTRICTRC rcStrict = IEMExecLots(pVCpu, 1024 - pVCpu->em.s.cIemThenRemInstructions /*cMaxInstructions*/,
UINT32_MAX/2 /*cPollRate*/, &cInstructions);
pVCpu->em.s.cIemThenRemInstructions += cInstructions;
if (rcStrict != VINF_SUCCESS)
{
//如果IEM因为没有实现相关代码导致执行出错,break,切换到REM
if ( rcStrict == VERR_IEM_ASPECT_NOT_IMPLEMENTED
|| rcStrict == VERR_IEM_INSTR_NOT_IMPLEMENTED)
break;
//如果是其他错误,直接返回给上层函数处理
return rcStrict;
}
//检查是否需要切换模式,比如64位Guest可以进入HM模式运行等
EMSTATE enmNewState = emR3Reschedule(pVM, pVCpu);
if (enmNewState != EMSTATE_REM && enmNewState != EMSTATE_IEM_THEN_REM)
{
//切换模式
pVCpu->em.s.enmPrevState = pVCpu->em.s.enmState;
pVCpu->em.s.enmState = enmNewState;
return VINF_SUCCESS;
}
//如果有force action,返回
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_REM_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_REM_MASK & ~VMCPU_FF_UNHALT))
return VINF_SUCCESS;
}
//异常退出或者执行指令超过1024条,改成REM模式
pVCpu->em.s.enmState = EMSTATE_REM;
return VINF_SUCCESS;
}
emR3NemExecute
在后面NEM这一篇里介绍
IEMExecLots
在后面IEM这一篇里介绍
emR3Reschedule
决定当前VCPU是否执行GuestOS 代码和用什么方法执行GuestOS代码 (EMSTATE_HM/EMSTATE_NEM/EMSTATE_IEM_THEN_REM/EMSTATE_REM/EMSTATE_WAIT_SIPI)
EMSTATE emR3Reschedule(PVM pVM, PVMCPU pVCpu)
{
//如果还在等待外部中断,返回EMSTATE_WAIT_SIPI 不执行GuestOS代码
if (pVCpu->em.s.enmState == EMSTATE_WAIT_SIPI)
return EMSTATE_WAIT_SIPI;
//设置了用全部用模拟执行执行GuestOS代码,返回EMSTATE_IEM
if (pVM->em.s.fIemExecutesAll)
return EMSTATE_IEM;
//如果没有开启RAW MODE模式,检查是否可以用HM/NEM。如果都不行,设置EMSTATE_IEM_THEN_REM,先模拟执行再尝试raw mode
if (!VM_IS_RAW_MODE_ENABLED(pVM))
{
if (VM_IS_HM_ENABLED(pVM))
{
if (HMCanExecuteGuest(pVM, pVCpu, &pVCpu->cpum.GstCtx))
return EMSTATE_HM;
}
else if (NEMR3CanExecuteGuest(pVM, pVCpu))
return EMSTATE_NEM;
return EMSTATE_IEM_THEN_REM;
}
//eflags里设置了单步模式,返回REM
if (EFlags.u32 & (X86_EFL_TF /* | HF_INHIBIT_IRQ_MASK*/))
{
return EMSTATE_REM;
}
//设置当前执行在8086模式(实模式),返回REM
if (EFlags.u32 & X86_EFL_VM) {
return EMSTATE_REM;
}
//没有开启分页模式,返回REM
uint32_t u32CR0 = pVCpu->cpum.GstCtx.cr0;
if ((u32CR0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
{
return EMSTATE_REM;
}
//cr4里开启PAE模式但是CPUID里没有开启PAE,返回REM
if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_PAE)
{
uint32_t u32Dummy, u32Features;
CPUMGetGuestCpuId(pVCpu, 1, 0, &u32Dummy, &u32Dummy, &u32Dummy, &u32Features);
if (!(u32Features & X86_CPUID_FEATURE_EDX_PAE))
return EMSTATE_REM;
}
if ( pVCpu->cpum.GstCtx.eflags.Bits.u1VM //8086模式
|| (uSS & X86_SEL_RPL) == 3) //Ring-3
{
//开启了中断,返回REM
if (!(EFlags.u32 & X86_EFL_IF))
{
return EMSTATE_REM;
}
//没有开启Write Protect,返回REM,否则GuestOS可以改写Hypervisor的代码和数据
if (!(u32CR0 & X86_CR0_WP))
{
return EMSTATE_REM;
}
}
else
{
//Ring-3 返回REM
if ((uSS & X86_SEL_RPL) != 0)
{
Log2(("raw r0 mode refused: CPL %d\n", uSS & X86_SEL_RPL));
return EMSTATE_REM;
}
//cs/ss段不是32位的,返回REM
if ( !(pVCpu->cpum.GstCtx.ss.Attr.n.u1DefBig)
|| !(pVCpu->cpum.GstCtx.cs.Attr.n.u1DefBig))
{
Log2(("raw r0 mode refused: SS/CS not 32bit\n"));
return EMSTATE_REM;
}
//没有开启Write Protect,返回REM,否则GuestOS可以改写Hypervisor的代码和数据
if (!(u32CR0 & X86_CR0_WP))
{
Log2(("raw r0 mode refused: CR0.WP=0!\n"));
return EMSTATE_REM;
}
}
//CS寄存器设置了CPUMSELREG_FLAGS_STALE,在TaskSwitch的时候会设置这个flag,表示是一个无效的状态
if (pVCpu->cpum.GstCtx.cs.fFlags & CPUMSELREG_FLAGS_STALE)
{
return EMSTATE_REM;
}
//其他寄存器也同样检查
...
//默认返回RAW
return EMSTATE_RAW;
}
ForcedActions系列函数
emR3HighPriorityPostForcedActions
在进入GuestOS代码之前,执行高优先级的操作
VBOXSTRICTRC emR3HighPriorityPostForcedActions(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc)
{
//cr3修改(VMX嵌套支持)
if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
{
//调用page manager 更新Cr3
int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
if (RT_FAILURE(rc2))
return rc2;
}
//PAE模式开启下PDPE页表修改(VMX嵌套支持)
if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
{
if (CPUMIsGuestInPAEMode(pVCpu))
{
PX86PDPE pPdpes = HMGetPaePdpes(pVCpu);
PGMGstUpdatePaePdpes(pVCpu, pPdpes);
}
else
VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES);
}
//IEM有pending工作,主要是有内存写入,调用IEM函数处理
if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_IEM))
rc = IEMR3ProcessForceFlag(pVM, pVCpu, rc);
//IOM有pending工作,主要是有I/O端口写入,调用IOM函数处理
if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_IOM))
{
rc = IOMR3ProcessForceFlag(pVM, pVCpu, rc);
if (pVCpu->em.s.idxContinueExitRec >= RT_ELEMENTS(pVCpu->em.s.aExitRecords))
{
}
else if (rc == VINF_SUCCESS)
rc = VINF_EM_RESUME_R3_HISTORY_EXEC;
else
pVCpu->em.s.idxContinueExitRec = UINT16_MAX;
}
//如果被设置了没有内存,返回内存不够,虚拟机报错
if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
{
if ( rc > VINF_EM_NO_MEMORY
&& rc <= VINF_EM_LAST)
rc = VINF_EM_NO_MEMORY;
}
}
emR3ForcedActions:
执行所有pending的action
VMExit的handle里如果有当时无法处理,需要返回给R3处理,但需要在下一次进入GuestOS代码之前完成的事件,可以调用VMCPU_FF_SET 宏设置pending action事件,emR3ForcedActions里会一一处理这些pending事件
int emR3ForcedActions(PVM pVM, PVMCPU pVCpu, int rc)
{
//执行GuestOS代码之后的postaction
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_NORMAL_PRIORITY_POST_MASK)
|| (VMCPU_FF_NORMAL_PRIORITY_POST_MASK && VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_NORMAL_PRIORITY_POST_MASK)) )
{
//当前emulation thread需要其他emulation thread完成的事件,比如需要等待事件等
if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS))
{
rc2 = VMMR3EmtRendezvousFF(pVM, pVCpu);
//如果需要EM暂停/重启或者结束
if (RT_UNLIKELY(rc == VINF_EM_SUSPEND || rc == VINF_EM_RESET || rc == VINF_EM_OFF))
{
return rc;
}
}
//VM状态改变
if (VM_FF_IS_SET(pVM, VM_FF_CHECK_VM_STATE))
{
//如果VM状态出错,返回对应的错误值
VMSTATE enmState = VMR3GetState(pVM);
switch (enmState)
{
case VMSTATE_FATAL_ERROR:
case VMSTATE_FATAL_ERROR_LS:
case VMSTATE_GURU_MEDITATION:
case VMSTATE_GURU_MEDITATION_LS:
return VINF_EM_SUSPEND;
case VMSTATE_DESTROYING:
return VINF_EM_TERMINATE;
}
}
//DBGFR3VMMForcedAction
if ( VM_FF_IS_SET(pVM, VM_FF_DBGF)
|| VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_DBGF) )
{
rc2 = DBGFR3VMMForcedAction(pVM, pVCpu);
}
//VM设置了需要重启的标记
if (VM_FF_TEST_AND_CLEAR(pVM, VM_FF_RESET))
{
rc2 = VBOXSTRICTRC_TODO(VMR3ResetFF(pVM));
}
//没有内存
if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
{
//尝试分配内存,如果还是失败,返回错误码到上层函数处理
rc2 = PGMR3PhysAllocateHandyPages(pVM);
if (rc == VINF_EM_NO_MEMORY)
return rc;
}
}
//normal task除了no memory之外
if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_NORMAL_PRIORITY_MASK, VM_FF_PGM_NO_MEMORY))
{
//有设备pending action
if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PDM_QUEUES, VM_FF_PGM_NO_MEMORY))
PDMR3QueueFlushAll(pVM);
//DMA pending
if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PDM_DMA, VM_FF_PGM_NO_MEMORY))
PDMR3DmaRun(pVM);
//又一次检查是否有需要等待其他线程完成的事件
if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS))
{
rc2 = VMMR3EmtRendezvousFF(pVM, pVCpu);
if (RT_UNLIKELY(rc == VINF_EM_SUSPEND || rc == VINF_EM_RESET || rc == VINF_EM_OFF))
{
return rc;
}
}
//需要等待外部事件
if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_REQUEST, VM_FF_PGM_NO_MEMORY))
{
rc2 = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY, false /*fPriorityOnly*/);
if (rc2 == VINF_EM_OFF || rc2 == VINF_EM_TERMINATE) /** @todo this shouldn't be necessary */
{
return rc2;
}
if (RT_UNLIKELY(rc == VINF_EM_SUSPEND || rc == VINF_EM_RESET || rc == VINF_EM_OFF))
{
return rc;
}
}
}
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_MASK))
{
//有定时器事件
if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TIMER)
&& !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
TMR3TimerQueuesDo(pVM);
//有pending的APIC中断,调用APICUpdatePendingInterrupts
if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
APICUpdatePendingInterrupts(pVCpu);
//INHIBIT_INTERRUPTS
if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
&& !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
{
if (CPUMGetGuestRIP(pVCpu) != EMGetInhibitInterruptsPC(pVCpu))
{
VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
}
}
//嵌套VMX
#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_VMX_APIC_WRITE | VMCPU_FF_VMX_MTF | VMCPU_FF_VMX_PREEMPT_TIMER))
{
//被嵌套Guest有APIC-write pending事件,模拟执行APIC Write
if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_APIC_WRITE))
{
rc2 = VBOXSTRICTRC_VAL(IEMExecVmxVmexitApicWrite(pVCpu));
if (rc2 != VINF_VMX_INTERCEPT_NOT_ACTIVE)
UPDATE_RC();
}
//被嵌套GuestOS执行Monior执行,需要模拟执行
if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_MTF))
{
rc2 = VBOXSTRICTRC_VAL(IEMExecVmxVmexit(pVCpu, VMX_EXIT_MTF, 0 /* uExitQual */));
}
//被嵌套GuestOS发生preemption timer VM-exit,需要模拟执行
if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_PREEMPT_TIMER))
{
rc2 = VBOXSTRICTRC_VAL(IEMExecVmxVmexitPreemptTimer(pVCpu));
}
}
#endif
//需要注入事件到Guest里
bool fWakeupPending = false;
if ( !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)
&& (!rc || rc >= VINF_EM_RESCHEDULE_HM)
&& !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS) /* Interrupt shadows block both NMIs and interrupts. */
&& !TRPMHasTrap(pVCpu)) /* An event could already be scheduled for dispatching. */
{
bool fInVmxNonRootMode;
bool fInSvmHwvirtMode;
bool const fInNestedGuest = CPUMIsGuestInNestedHwvirtMode(&pVCpu->cpum.GstCtx);
if (fInNestedGuest)
{
fInVmxNonRootMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
fInSvmHwvirtMode = CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx);
}
else
{
fInVmxNonRootMode = false;
fInSvmHwvirtMode = false;
}
bool fGif = CPUMGetGuestGif(&pVCpu->cpum.GstCtx);
if (fGif)
{
#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
//NMI-windows 中断
if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_NMI_WINDOW)
&& !CPUMIsGuestVmxVirtNmiBlocking(&pVCpu->cpum.GstCtx))
{
Assert(CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_NMI_WINDOW_EXIT));
Assert(CPUMIsGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx));
rc2 = VBOXSTRICTRC_VAL(IEMExecVmxVmexit(pVCpu, VMX_EXIT_NMI_WINDOW, 0 /* uExitQual */));
}
else
#endif
//NMI中断
if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)
&& !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
{
//嵌套VMX和SVM,都调用对应的模拟执行函数
#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
if ( fInVmxNonRootMode
&& CPUMIsGuestVmxPinCtlsSet(&pVCpu->cpum.GstCtx, VMX_PIN_CTLS_NMI_EXIT))
{
rc2 = VBOXSTRICTRC_VAL(IEMExecVmxVmexitXcptNmi(pVCpu));
}
else
#endif
#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
if ( fInSvmHwvirtMode
&& CPUMIsGuestSvmCtrlInterceptSet(pVCpu, &pVCpu->cpum.GstCtx, SVM_CTRL_INTERCEPT_NMI))
{
rc2 = VBOXSTRICTRC_VAL(IEMExecSvmVmexit(pVCpu, SVM_EXIT_NMI, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */));
}
else
#endif
{
//没有VMX嵌套,直接异常到GuestOS里
rc2 = TRPMAssertTrap(pVCpu, X86_XCPT_NMI, TRPM_TRAP);
//注入成功,可以继续执行GuestOS代码,先获取继续执行GuestOS的方法
if (rc2 == VINF_SUCCESS)
{
VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
fWakeupPending = true;
if (pVM->em.s.fIemExecutesAll)
rc2 = VINF_EM_RESCHEDULE;
else
{
rc2 = HMR3IsActive(pVCpu) ? VINF_EM_RESCHEDULE_HM
: VM_IS_NEM_ENABLED(pVM) ? VINF_EM_RESCHEDULE
: VINF_EM_RESCHEDULE_REM;
}
}
}
}
#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
//嵌套VMX需要注入INT_WINDOWS异常到GuestOS,调用IEM里的函数模拟执行
else if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_INT_WINDOW)
&& CPUMIsGuestVmxVirtIntrEnabled(&pVCpu->cpum.GstCtx))
{
Assert(CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_INT_WINDOW_EXIT));
Assert(CPUMIsGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx));
rc2 = VBOXSTRICTRC_VAL(IEMExecVmxVmexit(pVCpu, VMX_EXIT_INT_WINDOW, 0 /* uExitQual */));
}
#endif
//外部中断
else
{
if ( fInVmxNonRootMode
&& VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST)
&& CPUMIsGuestVmxVirtIntrEnabled(&pVCpu->cpum.GstCtx))
{
rc2 = VINF_SUCCESS;
}
//VMCPU_FF_INTERRUPT_APIC异常,调用IEM里的模拟执行函数执行
else if ( VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
&& CPUMIsGuestPhysIntrEnabled(pVCpu))
{
Assert(pVCpu->em.s.enmState != EMSTATE_WAIT_SIPI);
if (fInVmxNonRootMode)
rc2 = emR3VmxNstGstIntrIntercept(pVCpu);
else if (fInSvmHwvirtMode)
rc2 = emR3SvmNstGstIntrIntercept(pVCpu);
else
rc2 = VINF_NO_CHANGE;
if (rc2 == VINF_NO_CHANGE)
{
bool fInjected = false;
CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_XCPT_MASK);
//调用TRPM里的函数注入异常到GuestOS
rc2 = TRPMR3InjectEvent(pVM, pVCpu, TRPM_HARDWARE_INT, &fInjected);
fWakeupPending = true;
if ( pVM->em.s.fIemExecutesAll
&& ( rc2 == VINF_EM_RESCHEDULE_REM
|| rc2 == VINF_EM_RESCHEDULE_HM
|| rc2 == VINF_EM_RESCHEDULE_RAW))
{
rc2 = VINF_EM_RESCHEDULE;
}
}
}
//AMD CPU嵌套SVM需要注入异常到被嵌套GuestOS里
else if ( fInSvmHwvirtMode
&& VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST)
&& CPUMIsGuestSvmVirtIntrEnabled(pVCpu, &pVCpu->cpum.GstCtx))
{
rc2 = emR3SvmNstGstVirtIntrIntercept(pVCpu);
if (rc2 == VINF_NO_CHANGE)
{
VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST);
uint8_t const uNstGstVector = CPUMGetGuestSvmVirtIntrVector(&pVCpu->cpum.GstCtx);
TRPMAssertTrap(pVCpu, uNstGstVector, TRPM_HARDWARE_INT);
rc2 = VINF_EM_RESCHEDULE;
}
}
}
}
}
//需要申请给虚拟机用到物理内存,但不是没有内存导致,比如虚拟机启动的时候的申请物理内存
if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PGM_NEED_HANDY_PAGES, VM_FF_PGM_NO_MEMORY))
{
rc2 = PGMR3PhysAllocateHandyPages(pVM);
}
//没有内存,如果申请不到内存,需要返回错误
if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
{
rc2 = PGMR3PhysAllocateHandyPages(pVM);
if (rc == VINF_EM_NO_MEMORY)
return rc;
}
//重启虚拟时钟
if (VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
TMR3VirtualSyncFF(pVM, pVCpu);
}
emR3HmHandleRC
处理GuestOS返回状态,包含HM/IEM/REM3种执行方法的返回
/*
VINF_SUCCESS
其他值: 需要跳出ExecuteVM循环,返回上一层函数
*/
int emR3HmHandleRC(PVM pVM, PVMCPU pVCpu, int rc)
{
switch (rc)
{
case VINF_SUCCESS:
break;
case VINF_EM_RESCHEDULE_RAW:
case VINF_EM_RESCHEDULE_HM:
case VINF_EM_RAW_INTERRUPT:
case VINF_EM_RAW_TO_R3:
case VINF_EM_RAW_TIMER_PENDING:
case VINF_EM_PENDING_REQUEST:
rc = VINF_SUCCESS;
break;
//需要同步CR3,会在ForcesAction里处理,返回success
case VINF_PGM_SYNC_CR3:
AssertMsg(VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL),
("VINF_PGM_SYNC_CR3 and no VMCPU_FF_PGM_SYNC_CR3*!\n"));
rc = VINF_SUCCESS;
break;
case VINF_PGM_POOL_FLUSH_PENDING:
rc = VINF_SUCCESS;
break;
//页面模式改变,(开启分页模式,长模式等)
case VINF_PGM_CHANGE_MODE:
rc = PGMChangeMode(pVCpu, pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.cr4, pVCpu->cpum.GstCtx.msrEFER);
if (rc == VINF_SUCCESS)
rc = VINF_EM_RESCHEDULE;
break;
//R3 I/O读写,模拟执行IO指令 调用IEM里的模拟执行函数(后面介绍IEM时介绍这个函数)
case VINF_IOM_R3_IOPORT_READ:
case VINF_IOM_R3_IOPORT_WRITE:
case VINF_EM_RESUME_R3_HISTORY_EXEC: /* Resume EMHistoryExec after VMCPU_FF_IOM. */
rc = emR3ExecuteIOInstruction(pVM, pVCpu);
break;
//pending I/O读写 调用IEM里的模拟执行函数(后面介绍IEM时介绍这个函数)
case VINF_EM_PENDING_R3_IOPORT_WRITE:
rc = VBOXSTRICTRC_TODO(emR3ExecutePendingIoPortWrite(pVM, pVCpu));
break;
case VINF_EM_PENDING_R3_IOPORT_READ:
rc = VBOXSTRICTRC_TODO(emR3ExecutePendingIoPortRead(pVM, pVCpu));
break;
//MMIO读写,调用IEM里的模拟执行函数(后面介绍IEM时介绍这个函数)
case VINF_IOM_R3_MMIO_READ:
case VINF_IOM_R3_MMIO_WRITE:
case VINF_IOM_R3_MMIO_READ_WRITE:
rc = emR3ExecuteInstruction(pVM, pVCpu, "MMIO");
break;
//MSR read/Write
case VINF_CPUM_R3_MSR_READ:
case VINF_CPUM_R3_MSR_WRITE:
rc = emR3ExecuteInstruction(pVM, pVCpu, "MSR");
break;
//hypercall指令
case VINF_GIM_R3_HYPERCALL:
rc = emR3ExecuteInstruction(pVM, pVCpu, "Hypercall");
break;
//Pending I/O port write commit
case VINF_IOM_R3_IOPORT_COMMIT_WRITE:
case VINF_IOM_R3_MMIO_COMMIT_WRITE:
rc = VBOXSTRICTRC_TODO(IOMR3ProcessForceFlag(pVM, pVCpu, rc));
break;
//其他的错误码。直接返回给上层调用函数
case VINF_EM_TERMINATE:
case VINF_EM_OFF:
case VINF_EM_RESET:
....
break;
}
来源:CSDN
作者:yangrong的blog
链接:https://blog.csdn.net/qq_29684547/article/details/104125760