/*
 *  hxen_ops.c
 *  hxen
 *
 *  Copyright 2009 Citrix Systems, Inc. All rights reserved.
 *
 */

#include "hxen.h"
#include <libkern/libkern.h>
#include <sys/conf.h>
#include <kern/thread_call.h>
// #include <kern/timer_call.h>
#include "hxen_call.h"
#include <hxen_ioctl.h>
#include <sys/proc.h>
#include <sys/time.h>

#include <asm/bitops.h>
#include <asm/system.h>

#define KXEN_DEFINE_SYMBOLS_PROTO
#include <hxen_link.h>
KXEN_PROTOTYPES(extern)

#define KXEN_HEAP_SIZE (16 * 1024 * 1024)

static uint32_t *xenheap_pages = NULL;
static void *vmaptable = NULL;
static unsigned int vmaptable_size;
static void *frametable = NULL;
static unsigned int frametable_size;
static unsigned char *xenheap_va = NULL;

static thread_t hxen_cpu_ipi_thread[MAXIMUM_PROCESSORS];
static semaphore_t hxen_cpu_ipi_sem[MAXIMUM_PROCESSORS];
static uint32_t hxen_cpu_ipi_raised_vectors[MAXIMUM_PROCESSORS];
static thread_t hxen_idle_thread[MAXIMUM_PROCESSORS];
static semaphore_t hxen_idle_sem[MAXIMUM_PROCESSORS];
uint64_t hxen_cpu_timer_deadline[MAXIMUM_PROCESSORS];

#define KXEN_IPI_THREAD_PRIO 100 /* 31 */

static uint64_t hxen_ready_time;
static boolean_t hxen_is_ready = false;

//#define KXEN_EXECUTE_THREAD
#if defined(KXEN_EXECUTE_THREAD)
static thread_t hxen_execute_thread;
static int hxen_execute_return = 0;
#endif
static void hxen_execute_abort(void);

static void
hxen_cpu_ipi_cb(unsigned int host_cpu)
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    unsigned int vectors, v;

    HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, NULL);

    vectors = xchg(&hxen_cpu_ipi_raised_vectors[host_cpu], 0);
    while (vectors) {
	v = ffs(vectors) - 1;
	vectors &= ~(1 << v);
	hxen_dispatch_ipi(v + 0xf0);
    }

    UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
}

static void
hxen_vcpu_ipi_cb(struct vm_info *vi)
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    unsigned int vectors, v;

    HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, vi->vi_shared.vcpu);

    vectors = xchg(&vi->vi_raised_vectors, 0);
    while (vectors) {
	v = ffs(vectors) - 1;
	vectors &= ~(1 << v);
	hxen_dispatch_ivi(&vi->vi_shared, v + 0xf0);
    }

    UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
}

static void
hxen_cpu_ipi_thread_fn(void *context)
{
    unsigned int host_cpu = (unsigned int)(uintptr_t)context;
    kern_return_t kret;

    hxen_cpu_pin(host_cpu);
    dprintk("hxen_cpu_ipi_thread_fn %d on cpu %d\n", host_cpu,
	    xnu_get_cpu_number());

    while (hxen_shutting_down == FALSE) {
	kret = KERN_ABORTED;
	while (kret == KERN_ABORTED) {
	    kret = semaphore_wait(hxen_cpu_ipi_sem[host_cpu]);
	    if (kret != KERN_SUCCESS)
		dprintk("hxen_cpu_ipi_thread_fn %d semaphore_wait: %d\n",
			host_cpu, kret);
	}
	if (kret != KERN_SUCCESS)
	    break;
	while (host_cpu != xnu_get_cpu_number()) {
	    dprintk("hxen_cpu_ipi_thread_fn %d ipi on cpu %d != %d\n",
		    host_cpu, xnu_get_cpu_number(), host_cpu);
	    hxen_cpu_pin(host_cpu);
	}
	if (hxen_info && hxen_info->ki_running) {
	    hxen_cpu_ipi_cb(host_cpu);
	    hxen_cpu_timer_call_check(host_cpu);
	}
    }

    dprintk("hxen_cpu_ipi_thread_fn %d on cpu %d ==> exit\n", host_cpu,
	    xnu_get_cpu_number());
}

static void
hxen_vcpu_ipi_thread_fn(void *context)
{
    struct vm_info *vi = (struct vm_info *)context;
    kern_return_t kret;

    hxen_cpu_pin(vi->vi_host_cpu);
    dprintk("hxen_vcpu_ipi_thread_fn %p on cpu %d\n", vi,
	    xnu_get_cpu_number());

    while (hxen_shutting_down == FALSE) {
	kret = KERN_ABORTED;
        while (kret == KERN_ABORTED) {
	    kret = semaphore_wait(vi->vi_ipi_sem);
	    if (kret != KERN_SUCCESS)
		dprintk("hxen_vcpu_ipi_thread_fn %p semaphore_wait: %d\n",
			vi, kret);
	}
	if (kret != KERN_SUCCESS)
	    break;
	while (vi->vi_host_cpu != xnu_get_cpu_number()) {
	    dprintk("hxen_vcpu_ipi_thread_fn %p vcpu ipi on cpu %d != %d\n",
		    vi, xnu_get_cpu_number(), vi->vi_host_cpu);
	    hxen_cpu_pin(vi->vi_host_cpu);
	}
	if (hxen_info && hxen_info->ki_running) {
	    hxen_vcpu_ipi_cb(vi);
	}
    }

    dprintk("hxen_cpu_ipi_thread_fn %p on cpu %d ==> exit\n", vi,
	    xnu_get_cpu_number());
}

static void __cdecl
hxen_cpu_ipi(unsigned int host_cpu, unsigned int vector)
{
    kern_return_t kret;

    if (host_cpu < MAXIMUM_PROCESSORS && vector >= 0xf0 && vector <= 0xff) {
	if (!test_and_set_bit(vector - 0xf0,
			      &hxen_cpu_ipi_raised_vectors[host_cpu])) {
	    kret = semaphore_signal(hxen_cpu_ipi_sem[host_cpu]);
	    if (kret != KERN_SUCCESS)
		dprintk("hxen_cpu_ipi %d/%d semaphore_signal: %d\n",
			host_cpu, vector, kret);
	}
    }
}

static void __cdecl
hxen_vcpu_ipi(struct vm_info_shared *vis, unsigned int vector)
{
    struct vm_info *vi = (struct vm_info *)vis;
    kern_return_t kret;

    if (vector >= 0xf0 && vector <= 0xff) {
	if (!test_and_set_bit(vector - 0xf0, &vi->vi_raised_vectors)) {
	    kret = semaphore_signal(vi->vi_ipi_sem);
	    if (kret != KERN_SUCCESS)
                dprintk("hxen_vcpu_ipi %p/%d semaphore_signal: %d\n",
			vi, vector, kret);
	}
    }
}

static void
hxen_idle_thread_fn(void *context)
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    unsigned int host_cpu = (unsigned int)(uintptr_t)context;
    kern_return_t kret;

    hxen_cpu_pin(host_cpu);
    dprintk("hxen_idle_thread_fn %d on cpu %d\n", host_cpu,
	    xnu_get_cpu_number());

    HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, NULL);

    while (hxen_shutting_down == FALSE) {
	kret = KERN_ABORTED;
	while (kret == KERN_ABORTED) {
	    kret = semaphore_wait(hxen_idle_sem[host_cpu]);
	    if (kret != KERN_SUCCESS)
		dprintk("hxen_idle_thread_fn %d semaphore_wait: %d\n",
			host_cpu, kret);
	}
	if (kret != KERN_SUCCESS)
            break;
	if (host_cpu != xnu_get_cpu_number())
	    dprintk("hxen_idle_thread_fn run %d on %d\n", host_cpu,
		    xnu_get_cpu_number());
	if (hxen_info && hxen_info->ki_running)
	    hxen_do_softirq();
    }

    UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    dprintk("hxen_idle_thread_fn %d on cpu %d ==> exit\n", host_cpu,
	    xnu_get_cpu_number());
}

static void __cdecl
hxen_signal_idle_thread(unsigned int host_cpu)
{
    kern_return_t kret;

    if (hxen_idle_thread[host_cpu]) {
	kret = semaphore_signal(hxen_idle_sem[host_cpu]);
	if (kret != KERN_SUCCESS)
	    dprintk("hxen_signal_idle_thread %d semaphore_signal: %d\n",
		    host_cpu, kret);
    }
}

static void __cdecl
hxen_set_host_preemption(unsigned int disable)
{
    if (disable)
	xnu_disable_preemption();
    else
	xnu_enable_preemption();
}

static void
hxen_cpu_timer_cb(void *param0, void *param1)
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    // unsigned int host_cpu = (uintptr_t)param0;

    if (hxen_info && hxen_info->ki_running && hxen_info->ki_timer_interrupt) {
	HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, NULL);
	hxen_info->ki_timer_interrupt();
	UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    }
}

static void
hxen_vcpu_timer_cb(void *param0, void *param1)
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    struct vm_info *vi = (struct vm_info *)param0;

    if (hxen_info && hxen_info->ki_running &&
	hxen_info->ki_timer_interrupt_vcpu) {
	HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, vi->vi_shared.vcpu);
	hxen_info->ki_timer_interrupt_vcpu(&vi->vi_shared);
	UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    }
}

static void __cdecl
hxen_set_cpu_timer(unsigned int host_cpu, uint64_t expire)
{
    uint64_t now = mach_absolute_time();
    kern_return_t kret;

    expire += now;
#if 0
    if (expire < hxen_cpu_timer_deadline[host_cpu])
	dprintk("hxen_set_cpu_timer %d backwards expires %08X%08X "
		"was %08X%08X\n", host_cpu,
		(uint32_t)(expire>>32), (uint32_t)expire,
		(uint32_t)(hxen_cpu_timer_deadline[host_cpu]>>32),
		(uint32_t)(hxen_cpu_timer_deadline[host_cpu]));
#endif
    if (expire < now)
	dprintk("hxen_set_cpu_timer %d past expires %08X%08X "
		"now %08X%08X\n", host_cpu,
		(uint32_t)(expire>>32), (uint32_t)expire,
		(uint32_t)(now>>32), (uint32_t)now);
    hxen_cpu_timer_deadline[host_cpu] = expire;
    if (xnu_get_cpu_number() == host_cpu) {
	int enabled = xnu_disable_preemption();
	hxen_cpu_timer_call_enter(host_cpu, expire);
	if (enabled)
	    xnu_enable_preemption();
    } else {
	dprintk("hxen_set_cpu_timer cpu %d != %d\n", xnu_get_cpu_number(),
		host_cpu);
	kret = semaphore_signal(hxen_cpu_ipi_sem[host_cpu]);
	if (kret != KERN_SUCCESS)
	    dprintk("hxen_set_cpu_timer %d semaphore_signal: %d\n",
		    host_cpu, kret);
    }
}

static void __cdecl
hxen_set_vcpu_timer(struct vm_info_shared *vis, uint64_t expire)
{
    uint64_t now = mach_absolute_time();
    struct vm_info *vi = (struct vm_info *)vis;

    expire += now;
    if (expire < now)
	dprintk("hxen_set_vcpu_timer %p past expires %08X%08X "
		"now %08X%08X\n", vis,
		(uint32_t)(expire>>32), (uint32_t)expire,
		(uint32_t)(now>>32), (uint32_t)now);
    vi->vi_timer_deadline = expire;
    if (xnu_get_cpu_number() == vi->vi_host_cpu) {
	int enabled = xnu_disable_preemption();
	hxen_timer_call_enter(vi->vi_timer, expire);
	if (enabled)
	    xnu_enable_preemption();
    } else {
	dprintk("hxen_set_vcpu_timer cpu %d != %d                  XXXXXXX\n",
		xnu_get_cpu_number(), vi->vi_host_cpu);
	// wakeup(&hxen_cpu_ipi_thread[host_cpu]);
    }
}

static uint64_t __cdecl
hxen_get_hostime(void)
{

    return mach_absolute_time() >> 7;
}

static uint32_t __cdecl
hxen_get_unixtime(void)
{
    uint32_t secs, microsecs;

    clock_get_calendar_microtime(&secs, &microsecs);
    if (microsecs > (USEC_PER_SEC / 2))
	secs++;
    return secs;
}

static void __cdecl
hxen_halt_vm(struct vm_info_shared *vis)
{
    struct vm_info *vi = (struct vm_info *)vis;
    kern_return_t kret = KERN_ABORTED;

    while (kret == KERN_ABORTED) {
	kret = semaphore_wait(vi->vi_halt_sem);
	if (kret != KERN_SUCCESS)
	    dprintk("hxen_halt_vm semaphore_wait ret %d\n", kret);
    }
}

static void __cdecl
hxen_wake_vm(struct vm_info_shared *vis)
{
    struct vm_info *vi = (struct vm_info *)vis;
    kern_return_t kret;

    kret = semaphore_signal(vi->vi_halt_sem);
    if (kret != KERN_SUCCESS)
	dprintk("hxen_wake_vm semaphore_signal ret %d\n", kret);
}

static int __cdecl
hxen_host_processor_id(void)
{
    return xnu_get_cpu_number();
}

static unsigned long __cdecl
hxen_copy_to_user(void *to, const void *from, unsigned long n)
{
    int ret;

    ret = copyout(from, (user_addr_t)(uintptr_t)to, n);
    return (ret != 0) ? n : 0;
}

static unsigned long __cdecl
hxen_copy_from_user(void *to, const void *from, unsigned long n)
{
    int ret;

    ret = copyin((user_addr_t)(uintptr_t)from, to, n);
    if (ret != 0)
	memset(to, 0, n);
    return (ret != 0) ? n : 0;
}

static int
hxen_create_thread(void (*continuation)(void *), void *arg,
		   thread_t *thread, int importance)
{
    kern_return_t ret;
    thread_precedence_policy_data_t precedinfo;

    ret = kernel_thread_start((thread_continue_t)continuation, arg, thread);
    if (ret != KERN_SUCCESS) {
	fail_msg("kernel_thread_start");
	return ENOMEM;
    }
    thread_deallocate(*thread);

    precedinfo.importance = importance;
    ret = thread_policy_set(*thread, THREAD_PRECEDENCE_POLICY,
			    (thread_policy_t)&precedinfo,
			    THREAD_PRECEDENCE_POLICY_COUNT);
    if (ret != KERN_SUCCESS) {
	fail_msg("thread_policy_set THREAD_PRECEDENCE_POLICY");
	return EINVAL;
    }

    if (importance > 99) {
	thread_extended_policy_data_t extinfo;
	thread_time_constraint_policy_data_t timeinfo;
	uint64_t abstime;

	extinfo.timeshare = false;
	ret = thread_policy_set(*thread, THREAD_EXTENDED_POLICY,
				(thread_policy_t)&extinfo,
				THREAD_EXTENDED_POLICY_COUNT);
	if (ret != KERN_SUCCESS) {
	    fail_msg("thread_policy_set THREAD_EXTENDED_POLICY");
	    return EINVAL;
	}

	timeinfo.period = 0;
	nanoseconds_to_absolutetime(100 * NSEC_PER_USEC, &abstime);
	timeinfo.computation = abstime;
	nanoseconds_to_absolutetime(500 * NSEC_PER_USEC, &abstime);
	timeinfo.constraint = abstime;
	timeinfo.preemptible = false;
	ret = thread_policy_set(*thread, THREAD_TIME_CONSTRAINT_POLICY,
				(thread_policy_t)&timeinfo,
				THREAD_TIME_CONSTRAINT_POLICY_COUNT);
	if (ret != KERN_SUCCESS) {
	    fail_msg("thread_policy_set THREAD_TIME_CONSTRAINT_POLICY");
	    return EINVAL;
	}
    }

    return 0;
}

void
hxen_init_free_allocs(void)
{
    if (vmaptable) {
	kernel_free(vmaptable, vmaptable_size);
	vmaptable = NULL;
    }
    if (frametable) {
	kernel_free(frametable, frametable_size);
	frametable = NULL;
    }
    if (xenheap_pages) {
	kernel_free(xenheap_pages, hxen_info->ki_xenheap_page_count);
	xenheap_pages = NULL;
    }
    if (xenheap_va) {
	kernel_free(xenheap_va, hxen_info->ki_xenheap_page_count);
	xenheap_va = NULL;
    }
}

static int
hxen_init_timers_and_threads(void)
{
    int ret;
    kern_return_t kret;
    unsigned int host_cpu;

    for (host_cpu = 0; host_cpu < MAXIMUM_PROCESSORS; host_cpu++) {
	hxen_idle_thread[host_cpu] = NULL;
	hxen_cpu_ipi_thread[host_cpu] = NULL;

	if ((hxen_info->ki_cpu_active_mask & ((uintptr_t)1 << host_cpu)) == 0)
	    continue;

	dprintk("creating threads on cpu %d\n", host_cpu);
	dprintk("  [%d] idle\n", host_cpu);

	kret = semaphore_create(kernel_task, &hxen_idle_sem[host_cpu],
				0, 0);
	if (kret != KERN_SUCCESS) {
            fail_msg("semaphore_create idle %d: %d\n", kret);
	    return ENOMEM;
	}

	ret = hxen_create_thread(hxen_idle_thread_fn, (void *)host_cpu,
				 &hxen_idle_thread[host_cpu], 0);
	if (ret) {
	    fail_msg("hxen_create_thread idle %d", host_cpu);
	    return ret;
	}

	dprintk("  [%d] cpu ipi\n", host_cpu);

	kret = semaphore_create(kernel_task, &hxen_cpu_ipi_sem[host_cpu],
				0, 0);
	if (kret != KERN_SUCCESS) {
	    fail_msg("semaphore_create cpu ipi %d: %d\n", kret);
	    return ENOMEM;
	}

	ret = hxen_create_thread(hxen_cpu_ipi_thread_fn, (void *)host_cpu,
				 &hxen_cpu_ipi_thread[host_cpu],
				 KXEN_IPI_THREAD_PRIO);
	if (ret) {
	    fail_msg("hxen_create_thread cpu ipi %d", host_cpu);
	    return ret;
	}

	dprintk("  [%d] timer\n", host_cpu);
	hxen_cpu_timer_setup(host_cpu, hxen_cpu_timer_cb);
    }

    return 0;
}

int
hxen_init(void)
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    int ret = ENOMEM;
    unsigned int heap_pages;

    hxen_shutting_down = FALSE;

    dprintk("vvvvvvvvvvvvvvvvv\n"
	    "ldhxen %p; gdb hxen\n"
	    "^^^^^^^^^^^^^^^^^\n", hxen_hv);

    hxen_info->ki_printf = hxen_dprintk;
    hxen_info->ki_max_page = MAX_MACHINE_FRAME - 1;
    hxen_info->ki_kick_cpu = hxen_cpu_ipi;
    hxen_info->ki_kick_vcpu = hxen_vcpu_ipi;
    hxen_info->ki_signal_idle_thread = hxen_signal_idle_thread;
    hxen_info->ki_set_host_preemption = hxen_set_host_preemption;
    hxen_info->ki_set_timer = hxen_set_cpu_timer;
    hxen_info->ki_set_timer_vcpu = hxen_set_vcpu_timer;
    hxen_info->ki_get_hostime = hxen_get_hostime;
    hxen_info->ki_get_unixtime = hxen_get_unixtime;
    hxen_info->ki_cpu_active_mask = 0;
    hxen_info->ki_hostime_tick_frequency = KXEN_HOST_TIME_FREQUENCY;
    hxen_info->ki_halt_vm = hxen_halt_vm;
    hxen_info->ki_wake_vm = hxen_wake_vm;
    hxen_info->ki_on_each_cpu = hxen_on_each_cpu;
    hxen_info->ki_map_pages = hxen_map_pages;
    hxen_info->ki_unmap_pages = hxen_unmap_pages;
    hxen_info->ki_set_current = hxen_set_current;
    hxen_info->ki_get_current = hxen_get_current;
    hxen_info->ki_host_processor_id = hxen_host_processor_id;
    hxen_info->ki_copy_from_user = hxen_copy_from_user;
    hxen_info->ki_copy_to_user = hxen_copy_to_user;

    hxen_set_cpu_active_mask(&hxen_info->ki_cpu_active_mask,
			     sizeof(hxen_info->ki_cpu_active_mask));

    heap_pages = KXEN_HEAP_SIZE;
    // Add in size of m2p, 1 entry per page, 32 bit entries.
    heap_pages += hxen_info->ki_max_page * 4;
    // Add in size of xenheap bitmap.
    heap_pages += (heap_pages + 7) / 8;
    heap_pages = (heap_pages + (PAGE_SIZE-1)) >> PAGE_SHIFT;
    xenheap_va = kernel_malloc(heap_pages << PAGE_SHIFT);
    if (xenheap_va == NULL)
	goto out;
    hxen_info->ki_xenheap_va = xenheap_va;

    hxen_info->ki_xenheap_page_count = heap_pages;
    xenheap_pages = kernel_get_mfn_list((uintptr_t)xenheap_va,
					hxen_info->ki_xenheap_page_count);
    if (xenheap_pages == NULL)
	goto out;
    hxen_info->ki_xenheap_pages = xenheap_pages;

    vmaptable_size = hxen_info->ki_max_page * sizeof(struct hxen_vmap);
    vmaptable_size = ((vmaptable_size + PAGE_SIZE-1) & ~(PAGE_SIZE-1));
    vmaptable = kernel_malloc(vmaptable_size);
    if (vmaptable == NULL)
	goto out;
    memset(vmaptable, 0, vmaptable_size);
    hxen_info->ki_vmaptable = vmaptable;

    frametable_size = hxen_info->ki_max_page * hxen_sizeof_struct_page_info;
    frametable_size = ((frametable_size + PAGE_SIZE-1) & ~(PAGE_SIZE-1));
    frametable = kernel_malloc(frametable_size);
    if (frametable == NULL)
	goto out;
    hxen_info->ki_frametable = frametable;

    ret = hxen_init_timers_and_threads();
    if (ret != 0)
	goto out;

    hxen_info->ki_running = 1;

    hxen_cpu_pin_first();
    HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, NULL);
    hxen_start_xen();
    ret = 0;
    UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    hxen_cpu_unpin();

    hxen_ready_time = mach_absolute_time() + 2500ULL * 1000 * NSEC_PER_USEC;

    /* AAA per vcpu init */
    hxen_device->de_vm_info.vi_shared.vcpu = hxen_info->ki_dom0_current;
    hxen_device->de_vm_info.vi_host_cpu = hxen_cpu_vm;
    memset(&hxen_device->de_vm_info.vi_selinfo, 0,
	   sizeof(hxen_device->de_vm_info.vi_selinfo));

out:
    if (ret)
	hxen_init_free_allocs();
    return ret;
}

int
hxen_shutdown(void)
{
    unsigned int host_cpu;

    if (hxen_info)
	hxen_info->ki_running = 0;
    hxen_shutting_down = TRUE;

    hxen_execute_abort();

    for (host_cpu = 0; host_cpu < MAXIMUM_PROCESSORS; host_cpu++) {
	if (hxen_idle_thread[host_cpu] == NULL)
	    continue;

	semaphore_signal(hxen_idle_sem[host_cpu]);
	semaphore_destroy(kernel_task, hxen_idle_sem[host_cpu]);
	hxen_idle_sem[host_cpu] = NULL;
	semaphore_signal(hxen_cpu_ipi_sem[host_cpu]);
	semaphore_destroy(kernel_task, hxen_cpu_ipi_sem[host_cpu]);
	hxen_cpu_ipi_sem[host_cpu] = NULL;

	hxen_idle_thread[host_cpu] = NULL;
    }

    clock_delay_until(mach_absolute_time() + 1000ULL * 1000 * NSEC_PER_USEC);
    /* AAA ioemu event cleanup */

    /* AAA per VM */

    /* XXX per VM */
    if (hxen_device) {
	if (hxen_device->de_vm_info.vi_execute_mtx) {
	    hxen_lck_mtx_free(hxen_device->de_vm_info.vi_execute_mtx);
	    hxen_device->de_vm_info.vi_execute_mtx = NULL;
	}
	if (hxen_device->de_vm_info.vi_halt_sem) {
	    semaphore_destroy(kernel_task,
			      hxen_device->de_vm_info.vi_halt_sem);
	    hxen_device->de_vm_info.vi_halt_sem = NULL;
	}
	if (hxen_device->de_vm_info.vi_ipi_sem) {
	    semaphore_destroy(kernel_task,
			      hxen_device->de_vm_info.vi_ipi_sem);
	    hxen_device->de_vm_info.vi_ipi_sem = NULL;
	}
	hxen_free_vmappings(hxen_device);
    }

    return 0;
}

int
hxen_version(struct hxen_version_desc *kvd)
{

    kvd->kvd_driver_version_major = KXEN_DRIVER_VERSION_MAJOR;
    kvd->kvd_driver_version_minor = KXEN_DRIVER_VERSION_MINOR;
    memset(kvd->kvd_driver_version_tag, 0,
	   sizeof(kvd->kvd_driver_version_tag));
    strncpy(kvd->kvd_driver_version_tag, KXEN_DRIVER_VERSION_TAG,
	    sizeof(kvd->kvd_driver_version_tag));
    
    return 0;
}

#if defined(KXEN_EXECUTE_THREAD)
static void
hxen_execute_thread_fn(void *context)
{
    struct vm_info *vi;
    int error, ret;

    vi = &hxen_device->de_vm_info;

    hxen_cpu_pin_vcpu(vi);

    dprintk("hxen_execute_thread_fn ready on cpu %d\n", xnu_get_cpu_number());

    do {
	error = msleep(&hxen_execute_thread, NULL, PZERO | PCATCH,
		       "hxenexecute", NULL);
	if (error) {
	    dprintk("hxen_execute_thread_fn %d signal\n", vi->vi_host_cpu);
	    break;
	}
	if (!hxen_info || hxen_info->ki_running == 0) {
	    dprintk("hxen_execute_thread_fn exit after sleep\n");
	    ret = -1;
	    break;
	}
	if (vi->vi_host_cpu != xnu_get_cpu_number())
	    dprintk("hxen_execute_thread_fn run %d on %d\n", vi->vi_host_cpu,
		    xnu_get_cpu_number());
	do {
	    ret = hxen_try_call(hxen_run_vm, &vi->vi_shared,
				vi->vi_shared.vcpu);
	    if (ret || (hxen_device->de_ioemu_request_event == NULL) ||
		(hxen_info->ki_running == 0))
		break;
#if 0
	    KeClearEvent(hxen_device->de_ioemu_completed_event);
	    KeSetEvent(hxen_device->de_ioemu_request_event, 0, TRUE);
	    (void)KeWaitForSingleObject(hxen_device->de_ioemu_completed_event,
					Executive, KernelMode, FALSE, NULL);
#endif
	} while (hxen_info && hxen_info->ki_running);
	hxen_execute_return = ret;
	wakeup(&hxen_execute_return);
    } while (hxen_info && hxen_info->ki_running);

    dprintk("hxen_execute_thread_fn exit\n");
    hxen_cpu_unpin();

    return;
}

int
hxen_execute(void)
{
    int error, ret;

    if (hxen_is_ready == FALSE) {
	ret = hxen_create_thread(hxen_execute_thread_fn, NULL,
				 &hxen_execute_thread, 10);
	if (ret) {
	    fail_msg("hxen_create_thread execute");
	    return -1;
	}

	clock_delay_until(hxen_ready_time);
	hxen_is_ready = TRUE;
    }

    dprintk("hxen_execute start\n");
    wakeup(&hxen_execute_thread);
    error = msleep(&hxen_execute_return, NULL, PZERO | PCATCH,
		   "hxenexec", NULL);
    if (error) {
	dprintk("hxen_execute signal %d\n", error);
	return -1;
    }

    dprintk("hxen_execute return %d\n", hxen_execute_return);
    return hxen_execute_return;
}
#else
int
hxen_execute(void)
{
    struct vm_info *vi;
    int ret, error;
    kern_return_t kret;

    vi = &hxen_device->de_vm_info;

    hxen_cpu_pin_vcpu(vi);

    if (hxen_is_ready == FALSE) {
	vi->vi_timer = hxen_timer_setup(hxen_vcpu_timer_cb, vi);

	dprintk("  [%p] vcpu ipi\n", vi);
	ret = hxen_create_thread(hxen_vcpu_ipi_thread_fn, vi,
				 &vi->vi_ipi_thread, KXEN_IPI_THREAD_PRIO);
	if (ret) {
	    fail_msg("hxen_create_thread vcpu ipi %p", vi);
	    return ret;
	}

	vi->vi_execute_mtx = hxen_lck_mtx_alloc_init();
	if (vi->vi_execute_mtx == NULL) {
	    fail_msg("hxen_execute hxen_lck_mtx_alloc_init failed");
	    return ENOMEM;
	}

	kret = semaphore_create(kernel_task, &vi->vi_halt_sem, 0, 0);
	if (kret != KERN_SUCCESS) {
	    fail_msg("hxen_execute semaphore_create halt: %d", kret);
	    return ENOMEM;
	}

	kret = semaphore_create(kernel_task, &vi->vi_ipi_sem, 0, 0);
	if (kret != KERN_SUCCESS) {
	    fail_msg("hxen_execute semaphore_create ipi: %d", kret);
	    return ENOMEM;
	}

	clock_delay_until(hxen_ready_time);
	hxen_is_ready = TRUE;
    }

    do {
	ret = hxen_try_call(hxen_run_vm, vi, vi->vi_shared.vcpu);
	if (ret || /* (hxen_device->de_ioemu_request_event == NULL) || */
	    (hxen_info->ki_running == 0))
	    break;
	vi->vi_ioreq_pending = 1;
	do {
	    lck_mtx_lock(vi->vi_execute_mtx);
	    if (!vi->vi_ioreq_pending) {
		lck_mtx_unlock(vi->vi_execute_mtx);
		break;
	    }
	    selwakeup((struct selinfo *)&vi->vi_selinfo);
	    error = msleep(&vi->vi_ioreq_pending, vi->vi_execute_mtx,
			   PZERO | PCATCH | PDROP, "hxenioreq", NULL);
	    if (error) {
		dprintk("hxen_execute sleep signal %d\n", error);
		ret = -1;
		break;
	    }
	} while (vi->vi_ioreq_pending);
#if 0
	KeClearEvent(hxen_device->de_ioemu_completed_event);
	KeSetEvent(hxen_device->de_ioemu_request_event, 0, TRUE);
	(void)KeWaitForSingleObject(hxen_device->de_ioemu_completed_event,
				    Executive, KernelMode, FALSE, NULL);
#endif
    } while (hxen_info && hxen_info->ki_running);

    if (!hxen_info || hxen_info->ki_running == 0)
	ret = -1;
    return ret;
}
#endif

static void
hxen_execute_abort(void)
{
    struct vm_info *vi;

    vi = &hxen_device->de_vm_info;
    wakeup(&vi->vi_ioreq_pending);
#if defined(KXEN_EXECUTE_THREAD)
    wakeup(&hxen_execute_thread);
    hxen_execute_return = -1;
    wakeup(&hxen_execute_return);
#endif
}

int
hxen_keyhandler(char *keys)
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    unsigned int i;
    int ret;

    HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, hxen_info->ki_dom0_current);

    for (i = 0; keys[i]; i++)
	hxen_handle_keypress(keys[i]);
    ret = 0;

    UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);

    return ret;
}

int
hxen_set_ioemu_events(struct hxen_ioemu_events_desc *kied,
		      struct hxen_device *devext)
{
#if 0
    NTSTATUS status;
    int ret;

    if (devext->de_ioemu_request_event || devext->de_ioemu_completed_event) {
	fail_msg("hxen_set_ioemu_events: cannot change ioemu events");
	return EINVAL;
    }

    status = ObReferenceObjectByHandle(kied->kied_requestEvent, SYNCHRONIZE,
				       *ExEventObjectType, UserMode,
				       &devext->de_ioemu_request_event,
				       NULL);
    if (!NT_SUCCESS(status)) {
	fail_msg("hxen_set_ioemu_events: cannot get ioemu request event");
	devext->de_ioemu_request_event = NULL;
	return EINVAL;
    }

    status = ObReferenceObjectByHandle(kied->kied_completedEvent, SYNCHRONIZE,
				       *ExEventObjectType, UserMode,
				       &devext->de_ioemu_completed_event,
				       NULL);
    if (!NT_SUCCESS(status)) {
	fail_msg("hxen_set_ioemu_events: cannot get ioemu completed event");
	if (devext->de_ioemu_request_event)
	    ObDereferenceObject(devext->de_ioemu_request_event);
	devext->de_ioemu_request_event = NULL;
	devext->de_ioemu_completed_event = NULL;
	return EINVAL;
    }

    return 0;
#else
    dprintk("hxen_set_ioemu_events\n");
    return EINVAL;
#endif
}
