commit d7b21abd2997f5f363a2f767f20007938df6e9d7
Author: Wei Liu <liuw@liuw.name>
Date:   Sat Jun 25 14:41:23 2011 +0800

    Virtio for Xen: a working transport layer
    
    The frontend just spins around a shared variable to see if backend has
    finished its job.
    
    We don't use bitops here, because backend lives in userspace, which
    has no access to bitops.

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 3dd6294..65a437d 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -23,6 +23,21 @@ config VIRTIO_PCI
 
 	  If unsure, say M.
 
+config VIRTIO_XENBUS
+	tristate "Xenbus driver for virtio devices (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	select VIRTIO
+	select VIRTIO_RING
+	---help---
+	  This drivers provides support for virtio based paravirtual device
+	  drivers over Xenbus.  This requires that your VMM has appropriate PCI
+	  virtio backends.
+
+	  Currently, the ABI is not considered stable so there is no guarantee
+	  that this version of the driver will work with your VMM.
+
+	  If unsure, say M.
+
 config VIRTIO_BALLOON
 	tristate "Virtio balloon driver (EXPERIMENTAL)"
 	select VIRTIO
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 6738c44..c3ac569 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_VIRTIO) += virtio.o
 obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
+obj-$(CONFIG_VIRTIO_XENBUS) += virtio_xenbus.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
diff --git a/drivers/virtio/virtio_xenbus.c b/drivers/virtio/virtio_xenbus.c
new file mode 100644
index 0000000..be2ab52
--- /dev/null
+++ b/drivers/virtio/virtio_xenbus.c
@@ -0,0 +1,785 @@
+/*
+ * Virtio Xenbus driver
+ *
+ * This module allows virtio devices to be used over Xenbus.
+ *
+ * Authors:
+ *  Wei Liu <liuw@liuw.name>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_xenbus.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/time.h>
+#include <linux/virtio_ids.h>
+#include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
+
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <asm/xen/page.h>
+
+MODULE_AUTHOR("Wei Liu <liuw@liuw.name>");
+MODULE_DESCRIPTION("virtio-xenbus");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
+
+struct virtio_config_page {
+	u8  config[256];	/* config space, shadow copy */
+	int write;		/* is write? */
+	int size;		/* operand size */
+	int offset;		/* offset in config space */
+	int be_active;		/* Huh... */
+};
+
+/* Device structure */
+struct virtio_xenbus_device {
+	struct virtio_device vdev;
+	struct xenbus_device *xbdev;
+
+	int gref;
+	struct virtio_config_page *config_page;
+	spinlock_t irq_lock;
+	int conf_irq, notify_irq;
+	evtchn_port_t conf_evtchn, notify_evtchn;
+	char phys[32];
+
+	/* a list of queues so we can dispatch IRQs */
+	spinlock_t lock;
+	struct list_head virtqueues;
+};
+
+struct virtio_xenbus_vq_info {
+	/* the actual virtqueue */
+	struct virtqueue *vq;
+
+	/* the number of entries in the queue */
+	int num;
+
+	/* the index of the queue */
+	int queue_index;
+
+	/* the virtual address of the ring queue */
+	void *queue;
+
+	/* dma handle of the ring queue */
+	dma_addr_t queue_dh;
+
+	/* the list node for the virtqueue list */
+	struct list_head node;
+};
+
+/*---------------------------------------------------------------------*/
+/* This read/write implementation emulates "trap-process-return"
+ * behavior.
+ *
+ * Every read/write is passed to backend. The driver internal states
+ * are visible. However, every operation must be acked by backend.
+ *
+ * It is worth noting that 'offset' also acts as 'cmd' in r/w
+ * operations.
+ */
+
+void __vx_wait(struct virtio_xenbus_device *vx_dev)
+{
+	evtchn_port_t evtchn = vx_dev->conf_evtchn;
+	unsigned irq = vx_dev->conf_irq;
+	struct virtio_config_page *page = vx_dev->config_page;
+	s64 ns, ns_timeout;
+	struct timeval tv;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&vx_dev->irq_lock, irq_flags);
+	page->be_active = 1;
+
+	/* GO */
+	mb();
+
+	do_gettimeofday(&tv);
+	ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
+
+	notify_remote_via_evtchn(evtchn);
+
+	xen_clear_irq_pending(irq);
+
+	/* This gives us a chance to tell if our transport is
+	 * alive... */
+	while (page->be_active) {
+		xen_poll_irq_timeout(irq, jiffies + 3 * HZ);
+		xen_clear_irq_pending(irq);
+
+		do_gettimeofday(&tv);
+		ns = timeval_to_ns(&tv);
+		if (ns > ns_timeout) {
+			dev_err(&vx_dev->xbdev->dev,
+				"__vx_wait: virtio back not responding!!!\n");
+			page->be_active = 0;
+			goto out;
+		}
+	}
+out:
+	mb();
+	spin_unlock_irqrestore(&vx_dev->irq_lock, irq_flags);
+}
+
+u8 vxread8(struct virtio_xenbus_device *vx_dev, int offset)
+{
+	void *addr = &vx_dev->config_page->config[0] + offset;
+
+	vx_dev->config_page->write = 0;
+	vx_dev->config_page->offset = offset;
+	vx_dev->config_page->size = 1;
+	/* We have wmb() in __vx_wait, no need for another one here. */
+	__vx_wait(vx_dev);
+	/* By the time we get here, backend should've stored the
+	 * response in place. */
+	return readb(addr);
+}
+
+u16 vxread16(struct virtio_xenbus_device *vx_dev, int offset)
+{
+	void *addr = &vx_dev->config_page->config[0] + offset;
+
+	vx_dev->config_page->write = 0;
+	vx_dev->config_page->offset = offset;
+	vx_dev->config_page->size = 2;
+	/* We have wmb() in __vx_wait, no need for another one here. */
+	__vx_wait(vx_dev);
+	/* By the time we get here, backend should've stored the
+	 * response in place. */
+	return readw(addr);
+}
+
+u32 vxread32(struct virtio_xenbus_device *vx_dev, int offset)
+{
+	void *addr = &vx_dev->config_page->config[0] + offset;
+
+	vx_dev->config_page->write = 0;
+	vx_dev->config_page->offset = offset;
+	vx_dev->config_page->size = 4;
+	/* We have wmb() in __vx_wait, no need for another one here. */
+	__vx_wait(vx_dev);
+	/* By the time we get here, backend should've stored the
+	 * response in place. */
+	return readl(addr);
+}
+
+void vxwrite8(struct virtio_xenbus_device *vx_dev, int value, int offset)
+{
+	void *addr = &vx_dev->config_page->config[0] + offset;
+
+	vx_dev->config_page->write = 1;
+	vx_dev->config_page->offset = offset;
+	vx_dev->config_page->size = 1;
+	writeb(value, addr);
+	/* We have wmb() in __vx_wait, no need for another one here. */
+	__vx_wait(vx_dev);
+}
+
+void vxwrite16(struct virtio_xenbus_device *vx_dev, int value, int offset)
+{
+	void *addr = &vx_dev->config_page->config[0] + offset;
+	vx_dev->config_page->write = 1;
+	vx_dev->config_page->offset = offset;
+	vx_dev->config_page->size = 2;
+	writew(value, addr);
+	/* We have wmb() in __vx_wait, no need for another one here. */
+	__vx_wait(vx_dev);
+}
+
+void vxwrite32(struct virtio_xenbus_device *vx_dev, int value, int offset)
+{
+	void *addr = &vx_dev->config_page->config[0] + offset;
+	vx_dev->config_page->write = 1;
+	vx_dev->config_page->offset = offset;
+	vx_dev->config_page->size = 4;
+	writel(value, addr);
+	/* We have wmb() in __vx_wait, no need for another one here. */
+	__vx_wait(vx_dev);
+}
+
+static int virtio_xenbus_connect_backend(struct xenbus_device *dev,
+					 struct virtio_xenbus_device *vx_dev);
+static void virtio_xenbus_disconnect_backend(struct virtio_xenbus_device
+					     *vx_dev);
+
+static struct virtio_xenbus_device *to_vx_device(struct virtio_device *vdev)
+{
+	return container_of(vdev, struct virtio_xenbus_device, vdev);
+}
+
+/* virtio config->get_features() implementation */
+static u32 vx_get_features(struct virtio_device *vdev)
+{
+	struct virtio_xenbus_device *vx_dev = to_vx_device(vdev);
+	return vxread32(vx_dev, VIRTIO_XENBUS_HOST_FEATURES);
+}
+
+/* virtio config->finalize_features() implementation */
+static void vx_finalize_features(struct virtio_device *vdev)
+{
+	struct virtio_xenbus_device *vx_dev = to_vx_device(vdev);
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
+	/* We only support 32 feature bits. */
+	BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
+	vxwrite32(vx_dev, vdev->features[0], VIRTIO_XENBUS_GUEST_FEATURES);
+}
+
+/* virtio config->get() implementation */
+static void vx_get(struct virtio_device *vdev, unsigned offset,
+		   void *buf, unsigned len)
+{
+	struct virtio_xenbus_device *vx_dev = to_vx_device(vdev);
+	int offset1 = VIRTIO_XENBUS_CONFIG(vx_dev) + offset;
+	u8 *ptr = buf;
+	int i;
+	for (i = 0; i < len; i++)
+		ptr[i] = vxread8(vx_dev, offset1 + i);
+}
+
+/* the config->set() implementation.  it's symmetric to the config->get()
+ * implementation */
+static void vx_set(struct virtio_device *vdev, unsigned offset,
+		   const void *buf, unsigned len)
+{
+	struct virtio_xenbus_device *vx_dev = to_vx_device(vdev);
+	int offset1 = VIRTIO_XENBUS_CONFIG(vx_dev) + offset;
+	const u8 *ptr = buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		vxwrite8(vx_dev, ptr[i], offset1 + i);
+}
+
+/* config->{get,set}_status() implementations */
+static u8 vx_get_status(struct virtio_device *vdev)
+{
+	struct virtio_xenbus_device *vx_dev = to_vx_device(vdev);
+	return vxread8(vx_dev, VIRTIO_XENBUS_STATUS);
+}
+
+static void vx_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct virtio_xenbus_device *vx_dev = to_vx_device(vdev);
+	/* We should never be setting status to 0, which means
+	 * 'reset'. If we are to reset the device, use vx_reset. */
+	BUG_ON(status == 0);
+	vxwrite8(vx_dev, status, VIRTIO_XENBUS_STATUS);
+}
+
+static void vx_reset(struct virtio_device *vdev)
+{
+	struct virtio_xenbus_device *vx_dev = to_vx_device(vdev);
+	/* 0 status means a reset. just write 0 to status. */
+	vxwrite8(vx_dev, 0, VIRTIO_XENBUS_STATUS);
+}
+
+/* the notify function used when creating a virt queue */
+static void vx_notify(struct virtqueue *vq)
+{
+	struct virtio_xenbus_device *vx_dev = to_vx_device(vq->vdev);
+	struct virtio_xenbus_vq_info *info = vq->priv;
+	/* we write the queue's selector into the notification register to
+	 * signal the other end */
+	vxwrite16(vx_dev, info->queue_index, VIRTIO_XENBUS_QUEUE_NOTIFY);
+}
+
+/* Handle a configuration change: Tell driver if it wants to know. */
+static irqreturn_t vx_config_changed(int irq, void *opaque)
+{
+	struct virtio_xenbus_device *vx_dev = opaque;
+	struct virtio_driver *drv;
+
+	drv = container_of(vx_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	if (drv && drv->config_changed)
+		drv->config_changed(&vx_dev->vdev);
+	return IRQ_HANDLED;
+}
+
+/* Notify all virtqueues on an interrupt. */
+static irqreturn_t vx_vring_interrupt(int irq, void *opaque)
+{
+	struct virtio_xenbus_device *vx_dev = opaque;
+	struct virtio_xenbus_vq_info *info;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vx_dev->lock, flags);
+	list_for_each_entry(info, &vx_dev->virtqueues, node) {
+		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
+			ret = IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&vx_dev->lock, flags);
+
+	return ret;
+}
+
+/* A small wrapper to also acknowledge the interrupt when it's handled. */
+static irqreturn_t vx_interrupt(int irq, void *opaque)
+{
+	struct virtio_xenbus_device *vx_dev = opaque;
+	u8 isr;
+	irqreturn_t ret;
+
+	/* reading the ISR has the effect of also clearing it so it's very
+	 * important to save off the value. */
+	isr = vxread8(vx_dev, VIRTIO_XENBUS_ISR);
+
+	/* It's definitely not us if the ISR was not high */
+	if (!isr)
+		return IRQ_NONE;
+
+	/* Configuration change? Tell driver if it wants to know. */
+	if (isr & VIRTIO_XENBUS_ISR_CONFIG)
+		vx_config_changed(irq, opaque);
+
+	ret = vx_vring_interrupt(irq, opaque);
+
+	return ret;
+}
+
+static void vx_del_vq(struct virtqueue *vq)
+{
+	struct virtio_xenbus_device *vx_dev = to_vx_device(vq->vdev);
+	struct virtio_xenbus_vq_info *info = vq->priv;
+	unsigned long flags, size;
+
+	spin_lock_irqsave(&vx_dev->lock, flags);
+	list_del(&info->node);
+	spin_unlock_irqrestore(&vx_dev->lock, flags);
+
+	vxwrite16(vx_dev, info->queue_index, VIRTIO_XENBUS_QUEUE_SEL);
+
+	vring_del_virtqueue(vq);
+
+	/* Select and deactivate the queue */
+	vxwrite32(vx_dev, 0, VIRTIO_XENBUS_QUEUE_PFN);
+
+	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_XENBUS_VRING_ALIGN));
+	dma_free_coherent(NULL, size,
+			  info->queue, info->queue_dh);
+	kfree(info);
+}
+
+/* the config->del_vqs implementation */
+static void vx_del_vqs(struct virtio_device *vdev)
+{
+	struct virtqueue *vq, *n;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
+		vx_del_vq(vq);
+	}
+}
+
+static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
+				  void (*callback)(struct virtqueue *vq),
+				  const char *name)
+{
+	struct virtio_xenbus_device *vx_dev = to_vx_device(vdev);
+	struct virtio_xenbus_vq_info *info;
+	struct virtqueue *vq;
+	unsigned long flags, size;
+	u16 num;
+	int err;
+
+	/* Select queue */
+	vxwrite16(vx_dev, index, VIRTIO_XENBUS_QUEUE_SEL);
+
+	/* Check if queue is either not available or already active. */
+	num = vxread16(vx_dev, VIRTIO_XENBUS_QUEUE_NUM);
+	if (!num || vxread32(vx_dev, VIRTIO_XENBUS_QUEUE_PFN))
+		return ERR_PTR(-ENOENT);
+
+	/* allocate and fill out our structure that represents an active
+	 * queue */
+	info = kmalloc(sizeof(struct virtio_xenbus_vq_info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	info->queue_index = index;
+	info->num = num;
+
+	size = PAGE_ALIGN(vring_size(num, VIRTIO_XENBUS_VRING_ALIGN));
+	/* XXX liuw: the shared queue MUST be machine contiguous, so
+	 * use DMA */
+	info->queue = dma_alloc_coherent(NULL, size,
+					 &info->queue_dh,
+					 GFP_KERNEL|__GFP_ZERO);
+	if (info->queue == NULL) {
+		err = -ENOMEM;
+		goto out_info;
+	}
+
+	/* activate the queue */
+	vxwrite32(vx_dev, info->queue_dh >> PAGE_SHIFT,
+		  VIRTIO_XENBUS_QUEUE_PFN);
+
+	/* create the vring */
+	vq = vring_new_virtqueue(info->num, VIRTIO_XENBUS_VRING_ALIGN,
+				 vdev, info->queue, vx_notify, callback, name);
+	if (!vq) {
+		err = -ENOMEM;
+		goto out_active_queue;
+	}
+
+	vq->priv = info;
+	info->vq = vq;
+
+	spin_lock_irqsave(&vx_dev->lock, flags);
+	list_add(&info->node, &vx_dev->virtqueues);
+	spin_unlock_irqrestore(&vx_dev->lock, flags);
+
+	return vq;
+
+out_active_queue:
+	vxwrite32(vx_dev, 0, VIRTIO_XENBUS_QUEUE_PFN);
+	dma_free_coherent(NULL, size,
+			  info->queue, info->queue_dh);
+out_info:
+	kfree(info);
+	return ERR_PTR(err);
+}
+
+/* the config->find_vqs implementation */
+static int vx_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[],
+		       vq_callback_t *callbacks[],
+		       const char *names[])
+{
+	int err, i;
+
+	for (i = 0; i < nvqs; i++) {
+		vqs[i] = setup_vq(vdev, i, callbacks[i], names[i]);
+		if (IS_ERR(vqs[i])) {
+			err = PTR_ERR(vqs[i]);
+			goto error_find;
+		}
+	}
+	return 0;
+error_find:
+	vx_del_vqs(vdev);
+	return err;
+}
+
+static void virtio_xenbus_release_dev(struct device *_d)
+{
+	struct virtio_device *dev = container_of(_d, struct virtio_device,
+						 dev);
+	struct virtio_xenbus_device *vx_dev = to_vx_device(dev);
+
+	kfree(vx_dev);
+}
+
+static struct virtio_config_ops virtio_xenbus_config_ops = {
+	.get		= vx_get,
+	.set		= vx_set,
+	.get_status	= vx_get_status,
+	.set_status	= vx_set_status,
+	.reset		= vx_reset,
+	.find_vqs	= vx_find_vqs,
+	.del_vqs	= vx_del_vqs,
+	.get_features	= vx_get_features,
+	.finalize_features = vx_finalize_features,
+};
+
+static int __devinit virtio_xenbus_probe(struct xenbus_device *xbdev,
+					 const struct xenbus_device_id *id)
+{
+	int ret;
+	struct virtio_xenbus_device *vx_dev;
+
+	vx_dev = kzalloc(sizeof(*vx_dev), GFP_KERNEL);
+	if (!vx_dev) {
+		xenbus_dev_fatal(xbdev, -ENOMEM,
+				 "allocating virtio_xenbus_device");
+		return -ENOMEM;
+	}
+
+	/* fill in virtio device information */
+	vx_dev->vdev.dev.parent = &xbdev->dev;
+	vx_dev->vdev.dev.release = virtio_xenbus_release_dev;
+	vx_dev->vdev.config = &virtio_xenbus_config_ops;
+
+	/* Here we support mutiple virtio device types, so it is
+	 * necessary to distinguish between them. */
+	if (!strncmp(xbdev->devicetype, "virtio-blk", 10)) {
+		vx_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
+		vx_dev->vdev.id.device = VIRTIO_ID_BLOCK;
+	} else if (!strncmp(xbdev->devicetype, "virtio-net", 10)) {
+		vx_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
+		vx_dev->vdev.id.device = VIRTIO_ID_NET;
+	}
+
+	dev_set_drvdata(&xbdev->dev, vx_dev);
+	vx_dev->xbdev = xbdev;
+	vx_dev->notify_irq = -1;
+	vx_dev->conf_irq = -1;
+	vx_dev->notify_evtchn = -1;
+	vx_dev->conf_evtchn = -1;
+	vx_dev->gref = -1;
+	snprintf(vx_dev->phys, sizeof(vx_dev->phys), "xenbus/%s",
+		 xbdev->nodename);
+
+	INIT_LIST_HEAD(&vx_dev->virtqueues);
+	spin_lock_init(&vx_dev->lock);
+
+	spin_lock_init(&vx_dev->irq_lock);
+
+	vx_dev->config_page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (!vx_dev->config_page) {
+		ret = -ENOMEM;
+		xenbus_dev_fatal(xbdev, ret, "allocating device memory");
+		goto error_nomem;
+	}
+
+	ret = virtio_xenbus_connect_backend(xbdev, vx_dev);
+	if (ret < 0)
+		goto error;
+
+	return 0;
+
+error:
+	free_page((unsigned long)vx_dev->config_page);
+error_nomem:
+	dev_set_drvdata(&xbdev->dev, NULL);
+	return ret;
+}
+
+static int virtio_xenbus_resume(struct xenbus_device *xbdev)
+{
+	struct virtio_xenbus_device *vx_dev = dev_get_drvdata(&xbdev->dev);
+
+	virtio_xenbus_disconnect_backend(vx_dev);
+	memset(vx_dev->config_page, 0, PAGE_SIZE);
+	return virtio_xenbus_connect_backend(xbdev, vx_dev);
+}
+
+static int virtio_xenbus_remove(struct xenbus_device *xbdev)
+{
+	struct virtio_xenbus_device *vx_dev = dev_get_drvdata(&xbdev->dev);
+
+	virtio_xenbus_disconnect_backend(vx_dev);
+
+	unregister_virtio_device(&vx_dev->vdev);
+
+	vx_del_vqs(&vx_dev->vdev);
+
+	free_page((unsigned long)vx_dev->config_page);
+	kfree(vx_dev);
+
+	return 0;
+}
+
+static irqreturn_t vx_conf_handler(int irq, void *data)
+{
+	return IRQ_HANDLED;
+}
+
+static int virtio_xenbus_connect_backend(struct xenbus_device *xbdev,
+					 struct virtio_xenbus_device *vx_dev)
+{
+	int ret, evtchn;
+	struct xenbus_transaction xbt;
+
+	ret = gnttab_grant_foreign_access(xbdev->otherend_id,
+					  virt_to_mfn(vx_dev->config_page),
+					  0 /* can write */);
+	if (ret < 0)
+		return ret;
+	vx_dev->gref = ret;
+
+	ret = xenbus_alloc_evtchn(xbdev, &evtchn);
+	if (ret)
+		goto error_grant;
+
+	vx_dev->notify_evtchn = evtchn;
+	/* this evtchn is used to do data transfer */
+	ret = bind_evtchn_to_irqhandler(evtchn, vx_interrupt, /* XXX dummy */
+					0, xbdev->devicetype, vx_dev);
+	if (ret < 0) {
+		xenbus_dev_fatal(xbdev, ret, "bind_evtchn_to_irqhandler");
+		goto error_notify_evtchn;
+	}
+	vx_dev->notify_irq = ret;
+
+	ret = xenbus_alloc_evtchn(xbdev, &evtchn);
+	if (ret)
+		goto error_irqh;
+
+	vx_dev->conf_evtchn = evtchn;
+	/* this evtchn is used to do conf jobs */
+
+	ret = bind_evtchn_to_irqhandler(evtchn, vx_conf_handler, /* XXX dummy */
+					0, xbdev->devicetype, vx_dev);
+	if (ret < 0) {
+		xenbus_dev_fatal(xbdev, ret, "bind_evtchn_to_irqhandler 2");
+		goto error_conf_evtchn;
+	}
+	vx_dev->conf_irq = ret;
+
+again:
+	ret = xenbus_transaction_start(&xbt);
+	if (ret) {
+		xenbus_dev_fatal(xbdev, ret, "starting transaction");
+		goto error_conf_irqh;
+	}
+	ret = xenbus_printf(xbt, xbdev->nodename, "page-ref", "%lu",
+			    virt_to_mfn(vx_dev->config_page));
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, xbdev->nodename, "page-gref", "%u",
+			    vx_dev->gref);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, xbdev->nodename, "event-channel", "%u",
+			    vx_dev->conf_evtchn);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, xbdev->nodename, "event-channel2", "%u",
+			    vx_dev->notify_evtchn);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_transaction_end(xbt, 0);
+	if (ret) {
+		if (ret == -EAGAIN)
+			goto again;
+		xenbus_dev_fatal(xbdev, ret, "completing transaction");
+		goto error_conf_irqh;
+	}
+	xenbus_switch_state(xbdev, XenbusStateInitialised);
+
+	return 0;
+
+error_xenbus:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(xbdev, ret, "writing xenstore");
+error_conf_irqh:
+	unbind_from_irqhandler(vx_dev->conf_irq, vx_dev);
+	vx_dev->conf_irq = -1;
+error_conf_evtchn:
+	xenbus_free_evtchn(xbdev, vx_dev->conf_evtchn);
+	vx_dev->conf_evtchn = -1;
+error_irqh:
+	unbind_from_irqhandler(vx_dev->notify_irq, vx_dev);
+	vx_dev->notify_irq = -1;
+error_notify_evtchn:
+	xenbus_free_evtchn(xbdev, vx_dev->notify_evtchn);
+	vx_dev->notify_evtchn = -1;
+error_grant:
+	gnttab_end_foreign_access_ref(vx_dev->gref, 0);
+	vx_dev->gref = -1;
+	return ret;
+}
+
+static void virtio_xenbus_disconnect_backend(struct virtio_xenbus_device
+					     *vx_dev)
+{
+	if (vx_dev->notify_irq >= 0)
+		unbind_from_irqhandler(vx_dev->notify_irq, vx_dev);
+	vx_dev->notify_irq = -1;
+
+	if (vx_dev->conf_irq >= 0)
+		unbind_from_irqhandler(vx_dev->conf_irq, vx_dev);
+	vx_dev->conf_irq = -1;
+
+	if (vx_dev->gref >= 0)
+		gnttab_end_foreign_access_ref(vx_dev->gref, 0);
+	vx_dev->gref = -1;
+
+	xenbus_free_evtchn(vx_dev->xbdev, vx_dev->notify_evtchn);
+	xenbus_free_evtchn(vx_dev->xbdev, vx_dev->conf_evtchn);
+
+	vx_dev->notify_evtchn = -1;
+	vx_dev->conf_evtchn = -1;
+}
+
+static void virtio_backend_changed(struct xenbus_device *xbdev,
+				   enum xenbus_state backend_state)
+{
+	struct virtio_xenbus_device *vx_dev = dev_get_drvdata(&xbdev->dev);
+	int err;
+
+	dev_dbg(&xbdev->dev, "virtio_xenbus: backend changed to state %d.\n",
+		backend_state);
+
+	switch (backend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+	case XenbusStateReconfiguring:
+	case XenbusStateReconfigured:
+	case XenbusStateUnknown:
+	case XenbusStateClosed:
+		break;
+
+	case XenbusStateInitWait:
+		xenbus_switch_state(xbdev, XenbusStateConnected);
+		break;
+
+	case XenbusStateConnected:
+		/* We have now established transport layer */
+		err = register_virtio_device(&vx_dev->vdev);
+		xenbus_switch_state(xbdev, XenbusStateConnected);
+		break;
+
+	case XenbusStateClosing:
+		virtio_xenbus_disconnect_backend(vx_dev);
+		unregister_virtio_device(&vx_dev->vdev);
+		break;
+	}
+}
+
+static const struct xenbus_device_id virtio_ids[] = {
+	{ "virtio-blk" },
+	{ "virtio-net" },
+	{ "" },
+};
+
+static struct xenbus_driver virtio_front_driver = {
+	.name   = "virtio-xenbus-front",
+	.owner  = THIS_MODULE,
+	.ids    = virtio_ids,
+	.probe  = virtio_xenbus_probe,
+	.remove = virtio_xenbus_remove,
+	.resume = virtio_xenbus_resume,
+	.otherend_changed = virtio_backend_changed,
+};
+
+
+static int __init virtio_xenbus_init(void)
+{
+	if (!xen_pv_domain())
+		return -ENODEV;
+
+	/* Only for domU. Nothing to do if running in dom0. */
+	if (xen_initial_domain())
+		return -ENODEV;
+
+	return xenbus_register_frontend(&virtio_front_driver);
+}
+
+static void __exit virtio_xenbus_exit(void)
+{
+	xenbus_unregister_driver(&virtio_front_driver);
+}
+
+module_init(virtio_xenbus_init);
+module_exit(virtio_xenbus_exit);
diff --git a/include/linux/virtio_xenbus.h b/include/linux/virtio_xenbus.h
new file mode 100644
index 0000000..0cbb3f5
--- /dev/null
+++ b/include/linux/virtio_xenbus.h
@@ -0,0 +1,66 @@
+/*
+ * Virtio Xenbus driver
+ *
+ * This module allows virtio devices to be used over Xenbus.
+ *
+ * Authors:
+ *  Wei Liu <liuw@liuw.name>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _LINUX_VIRTIO_XENBUS_H
+#define _LINUX_VIRTIO_XENBUS_H
+
+#include <linux/virtio_config.h>
+
+/* Following macros define commands used by front end and back end.
+ * These are also offsets for specific options in config space. */
+
+/* A 32-bit r/o bitmask of the features supported by the host */
+#define VIRTIO_XENBUS_HOST_FEATURES        0
+
+/* A 32-bit r/w bitmask of features activated by the guest */
+#define VIRTIO_XENBUS_GUEST_FEATURES       4
+
+/* A 32-bit r/w PFN for the currently selected queue */
+#define VIRTIO_XENBUS_QUEUE_PFN            8
+
+/* A 16-bit r/o queue size for the currently selected queue */
+#define VIRTIO_XENBUS_QUEUE_NUM            12
+
+/* A 16-bit r/w queue selector */
+#define VIRTIO_XENBUS_QUEUE_SEL            14
+
+/* A 16-bit r/w queue notifier */
+#define VIRTIO_XENBUS_QUEUE_NOTIFY         16
+
+/* An 8-bit device status register.  */
+#define VIRTIO_XENBUS_STATUS               18
+
+/* An 8-bit r/o interrupt status register.  Reading the value will return the
+ * current contents of the ISR and will also clear it.  This is effectively
+ * a read-and-acknowledge. */
+#define VIRTIO_XENBUS_ISR                  19
+
+/* The bit of the ISR which indicates a device configuration change. */
+#define VIRTIO_XENBUS_ISR_CONFIG           0x2
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_XENBUS_CONFIG(dev)          20
+
+/* Virtio Xenbus ABI version, this must match exactly */
+#define VIRTIO_XENBUS_ABI_VERSION          0
+
+/* How many bits to shift physical queue address written to QUEUE_PFN.
+ * 12 is historical, and due to x86 page size. */
+#define VIRTIO_XENBUS_QUEUE_ADDR_SHIFT     12
+
+/* The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize. */
+#define VIRTIO_XENBUS_VRING_ALIGN          4096
+
+#endif /* _LINUX_VIRTIO_XENBUS_H */
