//
// hvm.c - Hypervisor interface routines for the event channel
//         driver.
//
// Copyright (c) 2006 XenSource, Inc. - All rights reserved.
//

#include "xenevtchn.h"
#include "xsapi.h"
#include "hvm.h"
#include "evtchn.h"
#include "hypercall.h"
#include "xenhdrs/memory.h"
#include "xenhdrs/hvm_params.h"
#include "xenhdrs/sched.h"
#include "verinfo.h"
#include "scsiboot.h"
//#include "vbd_special.h"
#include "xscompat.h"

//
// Dont care about unreferenced formal parameters here
//
#pragma warning( disable : 4100 )

hypercall_trap_gate *hypercall_page;
shared_info_t *HYPERVISOR_shared_info;
int HvmInterruptNumber;

static PHYSICAL_ADDRESS sharedInfoPhysAddr;

ULONG_PTR
HvmGetParameter(int param_nr)
{
    struct xen_hvm_param a;
    LONG_PTR rc;
    a.domid = DOMID_SELF;
    a.index = param_nr;
    a.value = 0xf001dead;
    rc = HYPERVISOR_hvm_op(HVMOP_get_param, &a);
    if (rc < 0) {
            TraceError (("Cannot get HVM parameter %d: %d.\n",
                    param_nr, rc));
        return rc;
    }
    /* Horrible hack to cope with the transition from
       return parameters through the hypercall return
       value to returning them through an in-memory
       structure. */
    if (a.value != 0xf001dead)
        rc = (int)a.value;    
    TraceLoud (("HVM param %d is %d.\n", param_nr, rc));
    return rc;
}

static int
HvmSetParameter(int param_nr, unsigned long value)
{
    struct xen_hvm_param a;
    a.domid = DOMID_SELF;
    a.index = param_nr;
    a.value = value;
    return (int)HYPERVISOR_hvm_op(HVMOP_set_param, &a);
}

int
HvmSetCallbackIrq(int irq)
{
    int ret;

    HvmInterruptNumber = irq;
    ret = HvmSetParameter(HVM_PARAM_CALLBACK_IRQ, irq);
    return ret;
}

int
AddPageToPhysmap(unsigned long pfn,
                 unsigned space,
                 unsigned long offset)
{
    struct xen_add_to_physmap xatp;

    xatp.domid = DOMID_SELF;
    xatp.space = space;
    xatp.idx = offset;
    xatp.gpfn = pfn;

    return HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
}

static ULONG
GetXenVersion(void)
{
    ULONG eax, ebx, ecx, edx;

    XenCpuid(1, &eax, &ebx, &ecx, &edx);
    return eax;
}

static PVOID
GetHypercallPage(VOID)
{
    ULONG eax, ebx ='fool', ecx = 'beef', edx = 'dead',
        nr_hypercall_pages;
    PVOID res;
    unsigned i;

    if (!ChechXenHypervisor()) {
        TraceError (("cpuid says this isn't really Xen.\n"));
        return NULL;
    }

    XenCpuid(1, &eax, &ebx, &ecx, &edx);
    TraceVerbose (("Xen version %d.%d.\n", eax >> 16, eax & 0xffff));

    //
    // Get the number of hypercall pages and the MSR to use to tell the
    // hypervisor which guest physical pages were assigned.
    //

    XenCpuid(2, &nr_hypercall_pages, &ebx, &ecx, &edx);

    res = XmAllocateMemory(PAGE_SIZE * nr_hypercall_pages);

    if (res == NULL) {
        TraceError (("Cannot allocate %d pages for hypercall trampolines.\n", nr_hypercall_pages));
        return NULL;
    }

    //
    // For each page, get the guest physical address and pass it to 
    // the hypervisor.
    //
    // Note: The low 12 bits of the address is used to pass the index
    // of the page within the hypercall area.
    //

    for (i = 0; i < nr_hypercall_pages; i++)
    {
        PHYSICAL_ADDRESS gpa;

        gpa = MmGetPhysicalAddress(((PCHAR)res) + (i << PAGE_SHIFT));
        _wrmsr(ebx, gpa.LowPart | i, gpa.HighPart);
    }

    return res;
}

/* This is sometimes used for its side-effect of priming the
 * XenutilGetVersionInfo cache. */
static VOID
PrintVersionInformation(VOID)
{
    if (KeGetCurrentIrql() == PASSIVE_LEVEL) {
        RTL_OSVERSIONINFOEXW info;
        XenutilGetVersionInfo(&info);
        TraceNotice (("Windows %d.%d build %d platform %d SP %d.%d, suite %d, type %d.\n",
                      info.dwMajorVersion, info.dwMinorVersion,
                      info.dwBuildNumber, info.dwPlatformId,
                      info.wServicePackMajor, info.wServicePackMinor,
                      info.wSuiteMask,
                      info.wProductType));
    } else {
        /* This can happen when we reinitialise for hibernation. */
    }
}

static NTSTATUS
HvmResume(VOID *ignore, SUSPEND_TOKEN token)
{
    int ret;

    UNREFERENCED_PARAMETER(ignore);
    UNREFERENCED_PARAMETER(token);

#ifdef AMD64
    HvmSetParameter(HVM_PARAM_32BIT, 0);
#else
    HvmSetParameter(HVM_PARAM_32BIT, 1);
#endif

    /* The rules for how to map shared info are a little complicated.
       On Zurich and Geneva, you just map shared_info_compat, and
       shared_info_xs doesn't exist.  On Rio through to Orlando, you
       need to map _xs, and if you map _compat you'll go wrong
       (because Xen triggers Geneva PV-driver bug workarounds off of
       that, and they do the wrong thing now that the bugs have been
       fixed).  On Midnight Ride, the two shared_info pages do exactly
       the same thing (because support for running the Geneva drivers
       has been dropped).  For post-Midnight Ride releases, we'd like
       to drop _xs and just use the _compat version (because the
       numeric code for shared_info_xs is in a place which is likely
       to collide with Open Source Xen).  If the numeric code does get
       reused, the map hypercall will appear to succeed but will map
       the wrong thing.

       Midnight Ride corresponds to Xen 3.3.

       The rule is therefore:

       -- If Xen version is > 3.3, map the _compat page.
       -- Otherwise, try to map the _xs page.
       -- If that fails, map the _compat page.
    */

    if (GetXenVersion() > 0x30003) {
        ret = AddPageToPhysmap((unsigned long)(sharedInfoPhysAddr.QuadPart >> PAGE_SHIFT),
                               XENMAPSPACE_shared_info_compat,
                               0);
    } else {
        ret = AddPageToPhysmap((unsigned long)(sharedInfoPhysAddr.QuadPart >> PAGE_SHIFT),
                               XENMAPSPACE_shared_info_xs,
                               0);
        if (ret != 0)
        {
            // We may be running on a hypervisor that doesn't support
            // the newer shared info struct, eg Geneva or before.  Try
            // again using the old hypercall parameter.

            TraceVerbose(("XENEVTCHN: AddPageToPhysmap/shared_info failed, trying shared_info_compat.\n"));
            ret = AddPageToPhysmap((unsigned long)(sharedInfoPhysAddr.QuadPart >> PAGE_SHIFT),
                                   XENMAPSPACE_shared_info_compat,
                                   0);
        }
    }
    if (ret != 0) {
        TraceError (("Failed to add shared info to physmap: %d.\n", ret));
        /* XXX error code */
        return STATUS_INSUFFICIENT_RESOURCES;
    }
    TraceInfo (("Mapped shared info.\n"));

    if (HvmInterruptNumber)
        HvmSetParameter(HVM_PARAM_CALLBACK_IRQ, HvmInterruptNumber);

    if (!XenPVFeatureEnabled(DEBUG_HA_SAFEMODE)) {
        UnplugIoemu();
    }

    return STATUS_SUCCESS;
}

static void
SetDriverVersion(ULONG build_nr)
{
    xen_hvm_set_driver_version_t xhsdv;

    xhsdv.build = build_nr;
    HYPERVISOR_hvm_op(HVMOP_set_driver_version, &xhsdv);
}

//
// InitHvm - Perform actions required to hook into Xen
//                   HVM interface.
//
NTSTATUS
InitHvm(void)
{
    static struct SuspendHandler *sh;

    TraceVerbose (("InitHvm.\n"));

    /* This call has the side-effect of priming the version
     * XenutilGetVersionInfo cache.  Do not move it around
     * unnecessarily. */

    PrintVersionInformation();

    hypercall_page = GetHypercallPage();
    if (hypercall_page == NULL)
        return STATUS_INSUFFICIENT_RESOURCES;

    SetDriverVersion(BRANDING_BUILD_NR);

    //
    // Allocate and map the shared info page.
    //
    HYPERVISOR_shared_info = XenevtchnAllocIoMemory(PAGE_SIZE, &sharedInfoPhysAddr);
    if (!HYPERVISOR_shared_info) {
        TraceError (("Cannot allocate shared info page.\n"));
        hypercall_page = NULL;
        return STATUS_INSUFFICIENT_RESOURCES;
    }
    TraceInfo(("HYPERVISOR_shared_info at %p (%x:%x)\n",
               HYPERVISOR_shared_info, sharedInfoPhysAddr));

#if !defined(KXEN_GUEST)
    /* We're too early to need to worry about races with suspend */
    if (!sh)
        sh = EvtchnRegisterSuspendHandler(HvmResume, NULL, "HvmResume",
                                          SUSPEND_CB_EARLY);
#endif
    HvmResume(NULL, null_SUSPEND_TOKEN());

#if !defined(KXEN_GUEST)
    if (!AustereMode)
        InitDebugHelpers();
#endif

    return STATUS_SUCCESS;
}

//
// CleanupHvm - Return resources consumed by driver back to the system.
//
VOID
CleanupHvm(
    VOID
)
{
#if !defined(KXEN_GUEST)
    CleanupDebugHelpers();
#endif
    if (hypercall_page) {
        //
        // Clear registered hvm callback.
        //
        HvmSetCallbackIrq(0);
    }
}

static int
GetXenTime(xen_hvm_get_time_t *gxt)
{
    int r;
    r = (int)HYPERVISOR_hvm_op(HVMOP_get_time, gxt);
    if (r >= 0)
        return r;
    return (int)HYPERVISOR_hvm_op(HVMOP_get_time_compat, gxt);
}

/* Get Xen's idea of the current time, in hundreds of nanoseconds
 * since 1601. */
ULONG64
HvmGetXenTime(void)
{
    xen_hvm_get_time_t gxt;
    uint32_t version, wc_nsec;
    uint64_t wc_sec;
    uint64_t system_time;
    uint64_t result;

    /* Read wc_sec, wc_nsec.  These give the UTC time at which Xen was
     * booted. */
    do {
        version = HYPERVISOR_shared_info->wc_version;
        XsMemoryBarrier();
        wc_sec = HYPERVISOR_shared_info->wc_sec;
        wc_nsec = HYPERVISOR_shared_info->wc_nsec;
        XsMemoryBarrier();
    } while (version != HYPERVISOR_shared_info->wc_version);

    TraceInfo(("GetXenTime: wc_sec %I64x, nsec %x.\n",
               wc_sec, wc_nsec));
    /* Convert from unix epoch (1970) to Windows epoch (1601) */
    wc_sec += 11644473600ull;

    /* wc_sec + wc_nsec*1e-9 now gives the time at which the system
       was booted, relative to 1601.  Find out the ``system time''
       (i.e. the number of nanoseconds since we booted). */
    if (GetXenTime(&gxt) == 0) {
        system_time = gxt.now;
    } else {
        /* Couldn't use the new protocol for getting the system time,
           so fall back to the old PV one.  This is based on pulling
           an epoch out of shared info and then applying an offset
           based on rdtsc.  Unfortunately, Xen lies to us about the
           actual value of the TSC (because we're an HVM guest).  Just
           ignore it and use the epoch value without any correction.
           It's unlikely to be out by more than a few hundred
           milliseconds. */
        system_time = HYPERVISOR_shared_info->vcpu_info[0].time.system_time;
    }

    result = (system_time + wc_nsec)/100;
    result += wc_sec * 10000000ull;

    return result;
}


static PHYSICAL_ADDRESS io_hole_start, io_hole_next, io_hole_end;
static PVOID io_hole_va_start, io_hole_va_next;

VOID
XenevtchnShutdownIoHole(void)
{
    io_hole_va_next = io_hole_va_start;
    io_hole_next = io_hole_start;
}

VOID
XenevtchnInitIoHole(PHYSICAL_ADDRESS base, PVOID base_va, ULONG nbytes)
{
    if (io_hole_va_start != NULL)
        return;
    io_hole_start = base;
    io_hole_va_start = base_va;
    io_hole_end.QuadPart = io_hole_start.QuadPart + nbytes;
    io_hole_next = io_hole_start;
    io_hole_va_next = io_hole_va_start;
}

#if !defined(KXEN_GUEST)
/* Only called during init -> no need for locks */
PVOID
XenevtchnAllocIoMemory(ULONG nr_bytes, PHYSICAL_ADDRESS *pa)
{
    PHYSICAL_ADDRESS res;
    PVOID res_va;
    static struct irqsafe_lock lock;
    KIRQL old_irql;

    nr_bytes = (nr_bytes + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);

    old_irql = acquire_irqsafe_lock(&lock);
    res = io_hole_next;
    res_va = io_hole_va_next;
    if (io_hole_next.QuadPart + nr_bytes > io_hole_end.QuadPart) {
        res_va = NULL;
        pa->QuadPart = 0;
    } else {
        io_hole_next.QuadPart += nr_bytes;
        io_hole_va_next = (PVOID)((ULONG_PTR)io_hole_va_next + nr_bytes);
        *pa = res;
    }
    release_irqsafe_lock(&lock, old_irql);

    if (res_va == NULL)
        TraceWarning (("Filled the io hole!\n"));

    return res_va;
}
#endif

/* Give Xen a hint that this vcpu isn't going to be doing anything
   productive for the next @ms milliseconds.  This only really makes
   sense in Austere mode; anywhere else, you'd use
   KeDelayExecutionThread(). */
void
DescheduleVcpu(unsigned ms)
{
    xen_hvm_get_time_t gxt;
    sched_poll_t sp;
    int code;

    memset(&sp, 0, sizeof(sp));

    if (GetXenTime(&gxt) != 0)
        return;
    sp.timeout = gxt.now + ms * 1000000ull;
    while (gxt.now < sp.timeout) {
        code = HYPERVISOR_sched_op(SCHEDOP_poll, &sp);
        if (code != 0)
            TraceNotice(("Failed to sleep %d\n", code));
        GetXenTime(&gxt);
    }
}

void
__XenTrace(XenTraceLevel level, __in_ecount(module_size) PCSTR module,
           size_t module_size, PCSTR fmt, va_list args)
{
    ___XenTrace(level, module, module_size, fmt, args);
}
