
#include "hxen.h"
#include <xen/errno.h>
#include "hxen_call.h"
#include <hxen_ioctl.h>

#define KXEN_DEFINE_SYMBOLS_PROTO
#include <hxen_link.h>
KXEN_PROTOTYPES(extern)

#define KXEN_HEAP_SIZE (32 * 1024 * 1024)

static void *vmaptable = NULL;
static unsigned int vmaptable_size;
static void *frametable = NULL;
static unsigned int frametable_size;

static KDPC hxen_cpu_ipi_dpc[MAXIMUM_PROCESSORS];
static KSPIN_LOCK hxen_cpu_ipi_spinlock[MAXIMUM_PROCESSORS];
static uint32_t hxen_cpu_ipi_raised_vectors[MAXIMUM_PROCESSORS];
static PETHREAD hxen_idle_thread[MAXIMUM_PROCESSORS];
static KEVENT hxen_idle_thread_event[MAXIMUM_PROCESSORS];
static KTIMER hxen_cpu_timer[MAXIMUM_PROCESSORS];
static KDPC hxen_cpu_timer_dpc[MAXIMUM_PROCESSORS];
BOOLEAN hxen_shutting_down = FALSE;

static LARGE_INTEGER hxen_ready_time;
static BOOLEAN hxen_is_ready = FALSE;

extern BOOLEAN *KdDebuggerEnabled;

int
hxen_except_handler(unsigned int code, struct _EXCEPTION_POINTERS *ep)
{
    if (hxen_info)
        hxen_info->ki_running = 0;
    hxen_shutting_down = TRUE;
    dprintk("ldhxen 0x%p; gdb hxen\n", hxen_hv);
    dprintk(".cxr 0x%p\n", ep->ContextRecord);
    if (*KdDebuggerEnabled)
	DbgBreakPoint();
    return EXCEPTION_EXECUTE_HANDLER;
}

static void
hxen_cpu_ipi_cb(
    IN PKDPC Dpc,
    IN PVOID DeferredContext,
    IN PVOID SystemArgument1,
    IN PVOID SystemArgument2
    )
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    unsigned int host_cpu = (unsigned int)(ULONG_PTR)SystemArgument1;
    unsigned int vectors, v;

    UNREFERENCED_PARAMETER(Dpc);
    UNREFERENCED_PARAMETER(DeferredContext);
    UNREFERENCED_PARAMETER(SystemArgument2);

    HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, NULL);

    KeAcquireSpinLockAtDpcLevel(&hxen_cpu_ipi_spinlock[host_cpu]);
    vectors = hxen_cpu_ipi_raised_vectors[host_cpu];
    hxen_cpu_ipi_raised_vectors[host_cpu] = 0;
    KeReleaseSpinLockFromDpcLevel(&hxen_cpu_ipi_spinlock[host_cpu]);
    while (vectors) {
	v = ffs(vectors) - 1;
	vectors &= ~(1 << v);
	try {
	    hxen_dispatch_ipi(v + 0xf0);
	} except (KXEN_EXCEPTION_EXECUTE_HANDLER) {
	    fail_msg("hxen_dispatch_ipi: exception: 0x%08X",
		     GetExceptionCode());
	}
    }

    UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
}

static void
hxen_vcpu_ipi_cb(
    IN PKDPC Dpc,
    IN PVOID DeferredContext,
    IN PVOID SystemArgument1,
    IN PVOID SystemArgument2
    )
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    unsigned int vector = (unsigned int)(ULONG_PTR)SystemArgument1;
    struct vm_info *vi = DeferredContext;
    UNREFERENCED_PARAMETER(Dpc);
    UNREFERENCED_PARAMETER(SystemArgument2);

    HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, vi->vi_shared.vcpu);

    if (vector) {
	try {
	    hxen_dispatch_ivi((struct vm_info_shared *)vi, vector);
	} except (KXEN_EXCEPTION_EXECUTE_HANDLER) {
	    fail_msg("hxen_dispatch_ivi: exception: 0x%08X",
		     GetExceptionCode());
	}
    }

    UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
}

static void __cdecl
hxen_cpu_ipi(unsigned int host_cpu, unsigned int vector)
{
    PKDPC dpc;
    KIRQL old_irql;
    int queue_dpc = 0;

    if (host_cpu < MAXIMUM_PROCESSORS && vector >= 0xf0 && vector <= 0xff) {
	KeAcquireSpinLock(&hxen_cpu_ipi_spinlock[host_cpu], &old_irql);
	if ((hxen_cpu_ipi_raised_vectors[host_cpu] & (1 << (vector - 0xf0)))
	    == 0) {
	    hxen_cpu_ipi_raised_vectors[host_cpu] |= 1 << (vector - 0xf0);
	    queue_dpc = 1;
	}
	KeReleaseSpinLock(&hxen_cpu_ipi_spinlock[host_cpu], old_irql);
	/* XXX insn barrier */
	if (queue_dpc) {
	    dpc = &hxen_cpu_ipi_dpc[host_cpu];
	    KeInsertQueueDpc(dpc, (PVOID)(ULONG_PTR)host_cpu, NULL);
	}
    }
}

static void __cdecl
hxen_vcpu_ipi(struct vm_info_shared *vis, unsigned int vector)
{
    PKDPC dpc;
    struct vm_info *vi = (struct vm_info *)vis;

    KeInsertQueueDpc(&vi->vi_ipi_dpc, (PVOID)(ULONG_PTR)vector, NULL);
}

void
hxen_idle_thread_fn(void *context)
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    unsigned int host_cpu = (unsigned int)(ULONG_PTR)context;

    KeSetSystemAffinityThread((ULONG_PTR)1 << host_cpu);

    HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, NULL);

    while (hxen_shutting_down == FALSE) {
	(void)KeWaitForSingleObject(&hxen_idle_thread_event[host_cpu],
				    Executive, KernelMode, TRUE, NULL);
	KeClearEvent(&hxen_idle_thread_event[host_cpu]);
	try {
	    if (hxen_info->ki_running)
		hxen_do_softirq();
	} except (KXEN_EXCEPTION_EXECUTE_HANDLER) {
	    fail_msg("hxen_do_softirq: exception: 0x%08X", GetExceptionCode());
	}
    }

    UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
}

static void __cdecl
hxen_signal_idle_thread(unsigned int host_cpu)
{

    KeSetEvent(&hxen_idle_thread_event[host_cpu], 0, FALSE);
}

static void __cdecl
hxen_set_host_preemption(unsigned int disable)
{
    static KIRQL oldIrql[MAXIMUM_PROCESSORS];

    if (disable) {
	KeRaiseIrql(DISPATCH_LEVEL, &oldIrql[KeGetCurrentProcessorNumber()]);
    } else {
	KeLowerIrql(oldIrql[KeGetCurrentProcessorNumber()]);
    }
}

static void
hxen_cpu_timer_cb(
    IN PKDPC Dpc,
    IN PVOID DeferredContext,
    IN PVOID SystemArgument1,
    IN PVOID SystemArgument2
    )
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    unsigned int host_cpu = (unsigned int)(ULONG_PTR)DeferredContext;
    UNREFERENCED_PARAMETER(Dpc);
    UNREFERENCED_PARAMETER(SystemArgument1);
    UNREFERENCED_PARAMETER(SystemArgument2);

    if (hxen_info && hxen_info->ki_running && hxen_info->ki_timer_interrupt) {
	HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, NULL);
	try {
	    hxen_info->ki_timer_interrupt();
	} except (KXEN_EXCEPTION_EXECUTE_HANDLER) {
	    DbgBreakPoint();
	}
	UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    }
}

static void
hxen_vcpu_timer_cb(
    IN PKDPC Dpc,
    IN PVOID DeferredContext,
    IN PVOID SystemArgument1,
    IN PVOID SystemArgument2
    )
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    struct vm_info *vi = (struct vm_info *)DeferredContext;
    UNREFERENCED_PARAMETER(Dpc);
    UNREFERENCED_PARAMETER(SystemArgument1);
    UNREFERENCED_PARAMETER(SystemArgument2);

    if (hxen_info && hxen_info->ki_running &&
	hxen_info->ki_timer_interrupt_vcpu) {
	HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, vi->vi_shared.vcpu);
	try {
	    hxen_info->ki_timer_interrupt_vcpu((struct vm_info_shared *)vi);
	} except (KXEN_EXCEPTION_EXECUTE_HANDLER) {
	}
	UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    }
}

static void __cdecl
hxen_set_cpu_timer(unsigned int host_cpu, uint64_t expire)
{
    LARGE_INTEGER timeDue;

    if (host_cpu >= MAXIMUM_PROCESSORS || !hxen_idle_thread[host_cpu])
	return;
    timeDue.QuadPart = TIME_RELATIVE(TIME_NANO(expire));
    KeSetTimerEx(&hxen_cpu_timer[host_cpu], timeDue, 0,
		 &hxen_cpu_timer_dpc[host_cpu]);
}

static void __cdecl
hxen_set_vcpu_timer(struct vm_info_shared *vis, uint64_t expire)
{
    LARGE_INTEGER timeDue;
    struct vm_info *vi = (struct vm_info *)vis;

    timeDue.QuadPart = TIME_RELATIVE(TIME_NANO(expire));
    KeSetTimerEx(&vi->vi_timer, timeDue, 0, &vi->vi_timer_dpc);
}

static uint64_t __cdecl
hxen_get_hostime(void)
{
    LARGE_INTEGER time;

    KeQuerySystemTime(&time);

    return (uint64_t)time.QuadPart;
}

static uint32_t __cdecl
hxen_get_unixtime(void)
{
    LARGE_INTEGER filetime;

    KeQuerySystemTime(&filetime);

    return (uint32_t)((filetime.QuadPart - 116444736000000000LL) / 10000000);
}

static void __cdecl
hxen_halt_vm(struct vm_info_shared *vis)
{
    struct device_extension *devext;
    struct vm_info *vm_info = (struct vm_info *)vis;

    devext = CONTAINING_RECORD(vm_info, struct device_extension, de_vm_info);

    (void)KeWaitForSingleObject(&devext->de_runnable, Executive, KernelMode,
				TRUE, NULL);
    KeClearEvent(&devext->de_runnable);
}

static void __cdecl
hxen_wake_vm(struct vm_info_shared *vis)
{
    struct device_extension *devext;
    struct vm_info *vm_info = (struct vm_info *)vis;

    devext = CONTAINING_RECORD(vm_info, struct device_extension, de_vm_info);

    KeSetEvent(&devext->de_runnable, 0, FALSE);
}

void
hxen_init_free_allocs(void)
{
    if (vmaptable) {
	kernel_free(vmaptable, vmaptable_size);
	vmaptable = NULL;
    }
    if (frametable) {
	kernel_free(frametable, frametable_size);
	frametable = NULL;
    }
    kernel_free_heap();
}

static NTSTATUS
init_cpu_dpc(KDPC *dpc, unsigned int host_cpu,
	     void (*cb)(KDPC *, void *, void *, void *), void *arg)
{
    KeInitializeDpc(dpc, cb, arg);
    KeSetTargetProcessorDpc(dpc, (CCHAR)host_cpu);
    KeSetImportanceDpc(dpc, HighImportance);

    return STATUS_SUCCESS;
}

static int __cdecl
hxen_host_processor_id(void)
{
    return KeGetCurrentProcessorNumber();
}

int
hxen_init(void)
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    int ret = -1;
    KAFFINITY affinity;
    unsigned int host_cpu;
    unsigned int heap_pages;

    hxen_shutting_down = FALSE;

    dprintk("vvvvvvvvvvvvvvvvv\n"
	    "ldhxen %p; gdb hxen\n"
	    "^^^^^^^^^^^^^^^^^\n", hxen_hv);

    hxen_info->ki_printf = hxen_dprintk;
    hxen_info->ki_max_page = MAX_MACHINE_FRAME;
    hxen_info->ki_kick_cpu = hxen_cpu_ipi;
    hxen_info->ki_kick_vcpu = hxen_vcpu_ipi;
    hxen_info->ki_signal_idle_thread = hxen_signal_idle_thread;
    hxen_info->ki_set_host_preemption = hxen_set_host_preemption;
    hxen_info->ki_set_timer = hxen_set_cpu_timer;
    hxen_info->ki_set_timer_vcpu = hxen_set_vcpu_timer;
    hxen_info->ki_get_hostime = hxen_get_hostime;
    hxen_info->ki_get_unixtime = hxen_get_unixtime;
    hxen_info->ki_cpu_active_mask = 0;
    hxen_info->ki_hostime_tick_frequency = KXEN_HOST_TIME_FREQUENCY;
    hxen_info->ki_halt_vm = hxen_halt_vm;
    hxen_info->ki_wake_vm = hxen_wake_vm;
    hxen_info->ki_on_each_cpu = hxen_on_each_cpu;
    hxen_info->ki_map_pages = hxen_map_pages;
    hxen_info->ki_unmap_pages = hxen_unmap_pages;
    hxen_info->ki_set_current = hxen_set_current;
    hxen_info->ki_get_current = hxen_get_current;
    hxen_info->ki_host_processor_id = hxen_host_processor_id;

    hxen_set_cpu_active_mask(&hxen_info->ki_cpu_active_mask,
			     sizeof(hxen_info->ki_cpu_active_mask));

    heap_pages = KXEN_HEAP_SIZE;
    // Add in size of m2p, 1 entry per page, 32 bit entries.
    heap_pages += hxen_info->ki_max_page * 4;
    // Add in size of xenheap bitmap.
    heap_pages += (heap_pages + 7) / 8;
    heap_pages = (heap_pages + (PAGE_SIZE-1)) >> PAGE_SHIFT;
    hxen_info->ki_xenheap_va =
        kernel_malloc_heap(heap_pages, &hxen_info->ki_xenheap_pages);
    if (hxen_info->ki_xenheap_va == NULL)
        goto out;
    hxen_info->ki_xenheap_page_count = heap_pages;

    vmaptable_size = hxen_info->ki_max_page * sizeof(struct hxen_vmap);
    vmaptable_size = ((vmaptable_size + PAGE_SIZE-1) & ~(PAGE_SIZE-1));
    vmaptable = kernel_malloc(vmaptable_size);
    if (vmaptable == NULL)
	goto out;
    memset(vmaptable, 0, vmaptable_size);
    hxen_info->ki_vmaptable = vmaptable;

    frametable_size = hxen_info->ki_max_page * hxen_sizeof_struct_page_info;
    frametable_size = ((frametable_size + PAGE_SIZE-1) & ~(PAGE_SIZE-1));
    frametable = kernel_malloc(frametable_size);
    if (frametable == NULL)
	goto out;
    hxen_info->ki_frametable = frametable;

    affinity = KeQueryActiveProcessors();
    for (host_cpu = 0; host_cpu < MAXIMUM_PROCESSORS; host_cpu++) {
	PKDPC dpc;
	HANDLE handle;
	NTSTATUS status;
	    
	hxen_idle_thread[host_cpu] = NULL;

	if ((affinity & ((ULONG_PTR)1 << host_cpu)) == 0)
	    continue;
	init_cpu_dpc(&hxen_cpu_ipi_dpc[host_cpu], host_cpu,
		     hxen_cpu_ipi_cb, NULL);
	KeInitializeSpinLock(&hxen_cpu_ipi_spinlock[host_cpu]);
	hxen_cpu_ipi_raised_vectors[host_cpu] = 0;

	KeInitializeEvent(&hxen_idle_thread_event[host_cpu],
			  NotificationEvent, FALSE);
	status = PsCreateSystemThread(&handle, 0, NULL, NULL, NULL,
				      hxen_idle_thread_fn,
                                      (PVOID)(ULONG_PTR)host_cpu);
	if (!NT_SUCCESS(status)) {
	    fail_msg("create idle thread failed: %x", status);
	    break;
	}

	status = ObReferenceObjectByHandle(handle, THREAD_ALL_ACCESS, NULL,
					   KernelMode,
					   &hxen_idle_thread[host_cpu], NULL);
	ZwClose(handle);
	if (!NT_SUCCESS(status)) {
	    fail_msg("get reference to idle thread %d faile: %x", status);
	    break;
	}

	KeInitializeTimer(&hxen_cpu_timer[host_cpu]);

	init_cpu_dpc(&hxen_cpu_timer_dpc[host_cpu], host_cpu,
		     hxen_cpu_timer_cb, (void *)(ULONG_PTR)host_cpu);
    }

    hxen_info->ki_running = 1;

    hxen_cpu_pin_first();
    HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, NULL);
    try {
	hxen_start_xen();
	ret = 0;
    } except (KXEN_EXCEPTION_EXECUTE_HANDLER) {
	ret = -1;
    }
    UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    hxen_cpu_unpin();

    KeQuerySystemTime(&hxen_ready_time);
    hxen_ready_time.QuadPart += TIME_MS(2500);

    /* XXX VM create done on dom0/vcpu0 */
    hxen_devext->de_vm_info.vi_shared.vcpu = hxen_info->ki_dom0_current;
    KeInitializeTimer(&hxen_devext->de_vm_info.vi_timer);
    KeInitializeDpc(&hxen_devext->de_vm_info.vi_timer_dpc,
		    hxen_vcpu_timer_cb, &hxen_devext->de_vm_info);
    KeSetImportanceDpc(&hxen_devext->de_vm_info.vi_timer_dpc, HighImportance);
    hxen_devext->de_vm_info.vi_host_cpu = hxen_cpu_vm;
    KeInitializeDpc(&hxen_devext->de_vm_info.vi_ipi_dpc,
		    hxen_vcpu_ipi_cb, &hxen_devext->de_vm_info);
    KeSetImportanceDpc(&hxen_devext->de_vm_info.vi_ipi_dpc, HighImportance);

out:
    if (ret)
	hxen_init_free_allocs();
    return ret;
}

int
hxen_shutdown(void)
{
    unsigned int host_cpu;

    if (hxen_info)
	hxen_info->ki_running = 0;
    hxen_shutting_down = TRUE;

    for (host_cpu = 0; host_cpu < MAXIMUM_PROCESSORS; host_cpu++) {
	if (hxen_idle_thread[host_cpu] == NULL)
	    continue;
	KeSetEvent(&hxen_idle_thread_event[host_cpu], 0, FALSE);
	KeCancelTimer(&hxen_cpu_timer[host_cpu]);
        KeWaitForSingleObject(hxen_idle_thread[host_cpu], Executive, KernelMode,
                              FALSE, NULL);
        ObDereferenceObject(hxen_idle_thread[host_cpu]);
        hxen_idle_thread[host_cpu] = NULL;
    }

    if (hxen_devext->de_ioemu_completed_event) {
	KeSetEvent(hxen_devext->de_ioemu_completed_event, 0, FALSE);
	ObDereferenceObject(hxen_devext->de_ioemu_completed_event);
	hxen_devext->de_ioemu_completed_event = NULL;
    }
    if (hxen_devext->de_ioemu_request_event) {
	ObDereferenceObject(hxen_devext->de_ioemu_request_event);
	hxen_devext->de_ioemu_request_event = NULL;
    }

    /* XXX per VM */
    KeCancelTimer(&hxen_devext->de_vm_info.vi_timer);

    KeFlushQueuedDpcs();

    /* XXX per VM */
    if (hxen_devext)
	hxen_free_vmappings(hxen_devext);

    return 0;
}

int
hxen_version(struct hxen_version_desc *kvd)
{

    kvd->kvd_driver_version_major = KXEN_DRIVER_VERSION_MAJOR;
    kvd->kvd_driver_version_minor = KXEN_DRIVER_VERSION_MINOR;
    memset(kvd->kvd_driver_version_tag, 0, sizeof(kvd->kvd_driver_version_tag));
    strncpy(kvd->kvd_driver_version_tag, KXEN_DRIVER_VERSION_TAG,
	    sizeof(kvd->kvd_driver_version_tag));
    
    return 0;
}

int
hxen_execute(void)
{
    struct vm_info *vi;
    int ret;

    if (hxen_is_ready == FALSE) {
	KeDelayExecutionThread(KernelMode, FALSE, &hxen_ready_time);
	hxen_is_ready = TRUE;
    }

    vi = &hxen_devext->de_vm_info;

    hxen_cpu_pin_vcpu(vi);

    do {
	ret = hxen_try_call(hxen_run_vm, vi, vi->vi_shared.vcpu);
	if (ret || (hxen_devext->de_ioemu_request_event == NULL) ||
	    (hxen_info->ki_running == 0))
	    break;
	KeClearEvent(hxen_devext->de_ioemu_completed_event);
	KeSetEvent(hxen_devext->de_ioemu_request_event, 0, TRUE);
        (void)KeWaitForSingleObject(hxen_devext->de_ioemu_completed_event,
				    Executive, KernelMode, FALSE, NULL);
    } while (hxen_info->ki_running);

    hxen_cpu_unpin();

    return ret;
}

int
hxen_keyhandler(char *keys, unsigned int num)
{
    DECLARE_EXCEPTION_REGISTRATION_RECORD(hxen_rec);
    unsigned int i;
    int ret;

    HOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec, hxen_info->ki_dom0_current);

    try {
	for (i = 0; i < num; i++)
	    hxen_handle_keypress(keys[i]);
	ret = 0;
    } except (KXEN_EXCEPTION_EXECUTE_HANDLER) {
	fail_msg("hxen_keyhandler: exception: 0x%08X", GetExceptionCode());
	ret = -EEXCEPT;
    }

    UNHOOK_EXCEPTION_REGISTRATION_RECORD(hxen_rec);

    return ret;
}

int
hxen_set_ioemu_events(struct hxen_ioemu_events_desc *kied,
		      struct device_extension *devext)
{
    NTSTATUS status;
    int ret;

    if (devext->de_ioemu_request_event || devext->de_ioemu_completed_event) {
	fail_msg("hxen_set_ioemu_events: cannot change ioemu events");
	return EINVAL;
    }

    status = ObReferenceObjectByHandle(kied->kied_requestEvent, SYNCHRONIZE,
				       *ExEventObjectType, UserMode,
				       &devext->de_ioemu_request_event,
				       NULL);
    if (!NT_SUCCESS(status)) {
	fail_msg("hxen_set_ioemu_events: cannot get ioemu request event");
	devext->de_ioemu_request_event = NULL;
	return EINVAL;
    }

    status = ObReferenceObjectByHandle(kied->kied_completedEvent, SYNCHRONIZE,
				       *ExEventObjectType, UserMode,
				       &devext->de_ioemu_completed_event,
				       NULL);
    if (!NT_SUCCESS(status)) {
	fail_msg("hxen_set_ioemu_events: cannot get ioemu completed event");
	if (devext->de_ioemu_request_event)
	    ObDereferenceObject(devext->de_ioemu_request_event);
	devext->de_ioemu_request_event = NULL;
	devext->de_ioemu_completed_event = NULL;
	return EINVAL;
    }

    return 0;
}
