vmm: Allow VMMs to change vmexit conditions (XCC) The 2LS (or any HR3 app) can change certain vmexit conditions. For VMX, these are the VMX controls. For AMD, we'll have to do something similar. Right now, you can control exit on halt (default yes) and exit on pause (default no). The greedy mode scheduler will turn off halt exiting, so that when the guest wants to halt, the core will actually halt. This will cut down on the interference with hyperthreads/caches. 2LSs can actually change this on the fly, subject to the number of host cores available. Ideally, we'd allow mwait too, but we need to sort out letting the guest mwait for power management, but not use it for monitor-mwait. As is, they actually could monitor-mwait, but once we tell them that mwait (and implied monitor) is available, we can't renege. That means we wouldn't be able to change the exiting status on the fly without the guest potentially sleeping forever. Reinstall your kernel headers. Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>

diff --git a/kern/arch/x86/vmm/intel/vmx.c b/kern/arch/x86/vmm/intel/vmx.c
index 863fec7..57b64b0 100644
--- a/kern/arch/x86/vmm/intel/vmx.c
+++ b/kern/arch/x86/vmm/intel/vmx.c

@@ -606,7 +606,6 @@
 				VM_EXIT_HOST_ADDR_SPACE_SIZE),	/* 64 bit */
 
 	.must_be_0 = (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
-				// VM_EXIT_ACK_INTR_ON_EXIT |
 				 VM_EXIT_SAVE_IA32_PAT |
 				 VM_EXIT_LOAD_IA32_PAT |
 				VM_EXIT_SAVE_VMX_PREEMPTION_TIMER),
@@ -1434,3 +1433,39 @@
 {
 	return gpc->proc->env_pgdir.eptp;
 }
+
+int vmx_ctl_get_exits(struct vmx_vmm *vmx)
+{
+	int ret = 0;
+
+	if (vmx->cpu_exec_ctls & CPU_BASED_HLT_EXITING)
+		ret |= VMM_CTL_EXIT_HALT;
+	if (vmx->cpu_exec_ctls & CPU_BASED_PAUSE_EXITING)
+		ret |= VMM_CTL_EXIT_PAUSE;
+	return ret;
+}
+
+int vmx_ctl_set_exits(struct vmx_vmm *vmx, int vmm_exits)
+{
+	int toggle_want;
+	int vmx_toggle_do = 0;
+
+	toggle_want = (vmx_ctl_get_exits(vmx) ^ vmm_exits) & VMM_CTL_ALL_EXITS;
+	if (toggle_want & VMM_CTL_EXIT_HALT) {
+	    if (!vmx_control_can_be_changed(&cbec, CPU_BASED_HLT_EXITING)) {
+			set_error(ENOSYS, "VMX can't toggle EXIT_HALT");
+			return -1;
+		}
+		vmx_toggle_do |= CPU_BASED_HLT_EXITING;
+	}
+	if (toggle_want & VMM_CTL_EXIT_PAUSE) {
+	    if (!vmx_control_can_be_changed(&cbec, CPU_BASED_PAUSE_EXITING)) {
+			set_error(ENOSYS, "VMX can't toggle EXIT_PAUSE");
+			return -1;
+		}
+		vmx_toggle_do |= CPU_BASED_PAUSE_EXITING;
+	}
+	/* This is being read concurrently by load_guest_pcore. */
+	WRITE_ONCE(vmx->cpu_exec_ctls, vmx->cpu_exec_ctls ^ vmx_toggle_do);
+	return 0;
+}

diff --git a/kern/arch/x86/vmm/intel/vmx.h b/kern/arch/x86/vmm/intel/vmx.h
index 2b448f7..b851933 100644
--- a/kern/arch/x86/vmm/intel/vmx.h
+++ b/kern/arch/x86/vmm/intel/vmx.h

@@ -403,3 +403,5 @@
 uint64_t gpc_get_eptp(struct guest_pcore *gpc);
 void vmx_clear_vmcs(void);
 void vmx_setup_vmx_vmm(struct vmx_vmm *vmx);
+int vmx_ctl_get_exits(struct vmx_vmm *vmx);
+int vmx_ctl_set_exits(struct vmx_vmm *vmx, int vmm_exits);

diff --git a/kern/include/ros/bits/syscall.h b/kern/include/ros/bits/syscall.h
index 23384c5..f92bc52 100644
--- a/kern/include/ros/bits/syscall.h
+++ b/kern/include/ros/bits/syscall.h

@@ -42,6 +42,7 @@
 #define SYS_pop_ctx					37
 #define SYS_vmm_poke_guest			38
 #define SYS_send_event				39
+#define SYS_vmm_ctl					40
 
 /* FS Syscalls */
 #define SYS_read				100

diff --git a/kern/include/ros/vmm.h b/kern/include/ros/vmm.h
index bf7bb8c..ef70f49 100644
--- a/kern/include/ros/vmm.h
+++ b/kern/include/ros/vmm.h

@@ -15,3 +15,9 @@
 #define VMCALL_SMPBOOT		0x2
 
 #define VMM_ALL_FLAGS	(VMM_VMCALL_PRINTF)
+
+#define VMM_CTL_GET_EXITS		1
+#define VMM_CTL_SET_EXITS		2
+#define VMM_CTL_EXIT_HALT		(1 << 0)
+#define VMM_CTL_EXIT_PAUSE		(1 << 1)
+#define VMM_CTL_ALL_EXITS		((1 << 2) - 1)

diff --git a/kern/src/syscall.c b/kern/src/syscall.c
index 32cfae8..39425af 100644
--- a/kern/src/syscall.c
+++ b/kern/src/syscall.c

@@ -1512,6 +1512,48 @@
 	return vmm_poke_guest(p, guest_pcoreid);
 }
 
+static int no_amd(void)
+{
+	set_error(ENOTSUP, "AMD VMMs unsupported");
+	return -1;
+}
+
+static int sys_vmm_ctl(struct proc *p, int cmd, unsigned long arg1,
+                       unsigned long arg2, unsigned long arg3,
+                       unsigned long arg4)
+{
+	int ret;
+
+	/* Protects against concurrent setters and for gets that are not atomic
+	 * reads (say, multiple exec ctls). */
+	qlock(&p->vmm.qlock);
+	switch (cmd) {
+	case VMM_CTL_GET_EXITS:
+		if (p->vmm.amd)
+			ret = no_amd();
+		else
+			ret = vmx_ctl_get_exits(&p->vmm.vmx);
+		break;
+	case VMM_CTL_SET_EXITS:
+		if (arg1 & ~VMM_CTL_ALL_EXITS) {
+			set_error(EINVAL, "Bad vmm_ctl_exits %x (%x)", arg1,
+			          VMM_CTL_ALL_EXITS);
+			ret = -1;
+			break;
+		}
+		if (p->vmm.amd)
+			ret = no_amd();
+		else
+			ret = vmx_ctl_set_exits(&p->vmm.vmx, arg1);
+		break;
+	default:
+		set_error(EINVAL, "Bad vmm_ctl cmd %d", cmd);
+		ret = -1;
+	}
+	qunlock(&p->vmm.qlock);
+	return ret;
+}
+
 /* Pokes the ksched for the given resource for target_pid.  If the target pid
  * == 0, we just poke for the calling process.  The common case is poking for
  * self, so we avoid the lookup.
@@ -2589,6 +2631,7 @@
 	[SYS_change_to_m] = {(syscall_t)sys_change_to_m, "change_to_m"},
 	[SYS_vmm_setup] = {(syscall_t)sys_vmm_setup, "vmm_setup"},
 	[SYS_vmm_poke_guest] = {(syscall_t)sys_vmm_poke_guest, "vmm_poke_guest"},
+	[SYS_vmm_ctl] = {(syscall_t)sys_vmm_ctl, "vmm_ctl"},
 	[SYS_poke_ksched] = {(syscall_t)sys_poke_ksched, "poke_ksched"},
 	[SYS_abort_sysc] = {(syscall_t)sys_abort_sysc, "abort_sysc"},
 	[SYS_abort_sysc_fd] = {(syscall_t)sys_abort_sysc_fd, "abort_sysc_fd"},

diff --git a/tests/strace.c b/tests/strace.c
index 3ff8fc8..407f572 100644
--- a/tests/strace.c
+++ b/tests/strace.c

@@ -222,6 +222,7 @@
 static struct trace_set vmm_trace_set = { "vmm",
 	{SYS_vmm_setup,
 	 SYS_vmm_poke_guest,
+	 SYS_vmm_ctl,
 	 SYS_pop_ctx,
 	 0}
 };

diff --git a/user/vmm/sched.c b/user/vmm/sched.c
index 288b8d7..1cc5b19 100644
--- a/user/vmm/sched.c
+++ b/user/vmm/sched.c

@@ -592,6 +592,8 @@
 		greedy_rnbl_guests = calloc(vm->nr_gpcs, sizeof(struct vmm_thread *));
 		assert(greedy_rnbl_guests);
 		vcore_request_total(sched_nr_greedy_cores());
+		syscall(SYS_vmm_ctl, VMM_CTL_SET_EXITS,
+		        syscall(SYS_vmm_ctl, VMM_CTL_GET_EXITS) & ~VMM_CTL_EXIT_HALT);
 	}
 	return 0;
 }