/**************************************************************************\
*//*! \file ef_vnic.c Xen front end driver

Copyright 2006 Solarflare Communications Inc,
               9501 Jeronimo Road, Suite 250,
               Irvine, CA 92618, USA

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License version 2 as published by the Free
Software Foundation, incorporated herein by reference.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*//*
\**************************************************************************/
#include <linux/module.h>
#include <linux/workqueue.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <xen/xenbus.h>
#include <xen/evtchn.h>
#include <xen/gnttab.h>

#include "etherfabric/tools.h"

#include <ci/driver/virtual/vnic.h>
#include "ci/xen/ef_msg_iface.h"
#include "ci/xen/ef_xen_util.h"
#include "ef_vnic_bufs.h"
#include "ef_vnic_netdev.h"

#include "ci/xen/ef_cuckoo_hash.h"
#include "ci/tools/profile.h"

#if CI_CFG_PROF_BUFFER

static ci_prof_index_t ef_profile_read_current = 0;
ci_prof_buffer_t*    ci_profile_buffer;
unsigned             ci_profile_buffer_size;

static int ef_profile_init(void)
{
  const unsigned size = 32768;
  ci_profile_buffer = vmalloc(size);
  if(ci_profile_buffer == NULL)
    return -ENOMEM;

  ci_profile_buffer_size = size;
  memset(ci_profile_buffer, 0, size);
  ci_profiler_init(ci_profile_buffer, ci_profile_buffer_size);
  ef_profile_read_current = ci_profiler_index_current(ci_profile_buffer);

  prof_calibrate();

  return 0;
}

static void ef_profile_fini(void)
{
  if(ci_profile_buffer != NULL) {
    vfree(ci_profile_buffer);
    ci_profile_buffer = NULL;
  }
}

static ssize_t profile_read(struct kobject *kobj, char *buf, loff_t off, size_t len)
{
  static char line_buffer[80];
  static int  line_length = 0;
  static int  line_used = 0;
  ssize_t res = 0;
  while(1) {
    if(line_used < line_length) {
      size_t n = CI_MIN(len, line_length - line_used);
      memcpy(buf, line_buffer+line_used, n);
      line_used+=n;
      buf+=n;
      len-=n;
      res+=n;
      if(len == 0)
        return res;
    };
    ci_assert_equal(line_used, line_length);
    line_length = 0;
    line_used = 0;
    {
      ci_uint32 len;
      ci_prof_index_t sample_index;
      ci_prof_record_t *ptr;
      ci_uint8 sample_kind;
      size_t ptrsize;
      char prof_copy[32];

      while(1) {
        sample_index = ef_profile_read_current;
        ci_ustream_readq(ci_profile_buffer, &ef_profile_read_current, &ptr, &len);
        if(ptr == NULL) {
          return res;
        }
        sample_kind = ci_ustream_hdr_type(ptr);
        ptrsize = len * sizeof(ci_ustream_element_t); /* bytes */
        ptrsize = CI_MIN(ptrsize, sizeof(prof_copy));
        memcpy(&prof_copy, ptr, ptrsize);
        if(ci_ustream_reader_fresh(ci_profile_buffer, sample_index))
          break;
        VNIC_VERB(ci_log("Profile resync"));
        ef_profile_read_current = ci_profiler_index_oldest(ci_profile_buffer);
      };
      ptr = (ci_prof_record_t*)prof_copy;
      line_length = snprintf(line_buffer,
                             80,
                             "%08x %04x %012llx %02x %08x %08x\n",
                             sample_index,
                             (unsigned)ci_profrec_frc_lineno(ptr->hdr.frc),
                             ci_profrec_frc_time(ptr->hdr.frc),
                             CI_PROFSAMPLE_TYPE(sample_kind),
                             len,
                             ptr->data[0]);
      if(line_length <= 0)
        return res;
      line_length = CI_MIN(line_length, 80);
    }
  }
}

static ssize_t profile_write(struct kobject *kobj, char *buf, loff_t off, size_t len)
{
  return -EINVAL;
}

static struct bin_attribute profile_attr = {
  .attr = {
    .name = "profile",
    .mode = 0444 | S_IFIFO,
    .owner = THIS_MODULE
  },
  .size = 0,
  .read = &profile_read,
  .write = &profile_write,
};
#endif

/* We talk version 0.010 of the interdomain protocol */
#define EF_VPROTO_VERSION (0x00001000)

/* The number of pages we will use for the rx slow path FIFO. In
 * principle we can make this quite large since we don't need the
 * pages to be physically contiguous and request them one at a 
 * time. In practice we are limited by the number of slots in the
 * message definition. We could be clever if we wanted more. */ 
#define VNIC_RX_PAGES (EF_MSG_MAX_PAGE_REQ)

/* As for VNIC_RX_PAGES, but this time for the Tx slow path */
#define VNIC_TX_PAGES (EF_MSG_MAX_PAGE_REQ)

static int init_etherfabric_vnic(void);
static void cleanup_etherfabric_vnic(void);

/* Register module init/exit callbacks */
module_init(init_etherfabric_vnic);
module_exit(cleanup_etherfabric_vnic);
MODULE_LICENSE("GPL");

static int vnic_probe(struct xenbus_device *dev,
		      const struct xenbus_device_id *id);
static int vnic_resume(struct xenbus_device *dev);

static void vnic_bend_changed(struct xenbus_device *dev, XenbusState vnic_state);
static int vnic_resume(struct xenbus_device *dev);
static int vnic_remove(struct xenbus_device *dev);

static irqreturn_t pkt_from_bend(int irq, void *context, struct pt_regs *unused);

static struct xenbus_device_id vnic_ids[] = {
        { "ef1" }, /* This must match the backend */
        { "" }
};


/* Tell xenbus how to call us for particular actions */
static struct xenbus_driver vnic = {
        .name = "ef1",
        .owner = THIS_MODULE,
        .ids = vnic_ids,
        .probe = vnic_probe,
        .remove = vnic_remove,
        .resume = vnic_resume,
        .otherend_changed = vnic_bend_changed,
};

/* Called at module init.  Register our frontend driver */
int init_etherfabric_vnic()
{
  int rc;
  ci_set_log_prefix("VNIC:" );
  ci_log("so far so good...");
#if CI_CFG_PROF_BUFFER
  rc = ef_profile_init();
  if(rc != 0)
    goto profile_error;
  rc = sysfs_create_bin_file(&THIS_MODULE->mkobj.kobj, &profile_attr);
  if(rc != 0)
    goto sysfs_error;
#endif
  rc = xenbus_register_frontend(&vnic);  
  ci_log("xenbus_register_frontend returned %d", rc);
  if(rc != 0)
    goto xenbus_error;
  return 0;
xenbus_error:
#if CI_CFG_PROF_BUFFER
  sysfs_remove_bin_file(&THIS_MODULE->mkobj.kobj, &profile_attr);
sysfs_error:
  ef_profile_fini();
profile_error:
#endif
  ci_log("Module load error %d", rc);
  return rc;
}

/* Called at module exit, unregister driver */
void cleanup_etherfabric_vnic()
{
  xenbus_unregister_driver(&vnic);
#if CI_CFG_PROF_BUFFER
  sysfs_remove_bin_file(&THIS_MODULE->mkobj.kobj, &profile_attr);
  ef_profile_fini();
#endif
  ci_log("All done.");
}

/* Carry out a state change (we can't do from IRQ as it may sleep) */
static void vnic_state_change(void *context)
{
  struct xenbus_device *dev = context;
  ef_vnic_bec *bec = (ef_vnic_bec *)dev->dev.driver_data;
  int err;

  xenbus_switch_state(dev, bec->next_state);
  switch(bec->next_state) {
    case XenbusStateConnected:
      /* We've been done the handshake and been given a MAC. Time for
       * a network device. */
      err = ef_vnic_osd_probe(EF_VNIC_OSD_FROM_BEC(bec)) ? 0 : EIO;
      if (err) {
        /* Life is really pretty pointless if we couldn't create a netdev */
        xenbus_dev_fatal(dev, err, "creating network device");
      } else {
        /* Install the handler for network events */
        err = bind_evtchn_to_irqhandler(bec->net_channel, pkt_from_bend,
                                        SA_SAMPLE_RANDOM, "vnicnet", dev);

        if (err < 0)
          xenbus_dev_fatal(dev, err, "installing network IRQ");
      }
      break;
    default:
      break;
  }
}

/* Request a state change be scheduled - we can't do from IRQ as it
   may sleep */
static inline void vnic_schedule_state(ef_vnic_bec *bec, int state)
{
  bec->next_state = state;
  schedule_work(&bec->state_change);
}

/* ioremap the pages that we're passed by the backend driver, and set
   up state to point to relevant portions of those pages */
static int vnic_map_hardware(struct xenbus_device *dev, struct ef_msg_hw *hw_msg)
{
  ef_vnic_bec *bec= (ef_vnic_bec *)dev->dev.driver_data;

  return ef_vnic_svh_ctor(EF_VNIC_SVH_FROM_BEC(bec), hw_msg);
}


/* Take a buffer from the back end driver and remap it locally */
static int vnic_convert_bufs(struct xenbus_device *dev, struct ef_msg *msg)
{
  int i, rc = 0;

  for (i = 0; i < msg->u.bufs.pages; i++) {
    unsigned grant = msg->u.bufs.reqs[i].grant;
    /* FIXME: This can block. */
    msg->u.bufs.reqs[i].ptr = xenbus_map_ring_valloc(dev, grant);
    if(IS_ERR(msg->u.bufs.reqs[i].ptr))
      rc = PTR_ERR(msg->u.bufs.reqs[i].ptr);

    VNIC_VERB(ci_log("attempt to map buffer page (grant %d) returned %d (pointer %p)",
                     grant, rc, msg->u.bufs.reqs[i].ptr));
    VNIC_VERB(ci_log("page %p has buffer id %lx", msg->u.bufs.reqs[i].ptr, 
                     msg->u.bufs.buf + i*PAGE_SIZE));
  }

  return rc;
}


static int vnic_convert_mapped_bufs(ef_vnic_svh *svh, 
                                    struct ef_msg *msg)
{
  int i, offset = msg->u.mapbufs.reqid;
  struct ef_vnic_bufinfo *bufinfo = svh->bufs;
  
  for (i = 0; i < msg->u.mapbufs.pages; i++) {
    ci_assert_lt(offset + i, EF_BUF_MAX_PAGES);
    msg->u.mapbufs.reqs[i].ptr = bufinfo->page_list[offset + i];
  }

  return 0;
}


int vnic_send_buffer_requests(ef_vnic_bec *bec, ef_vnic_svh *svh)
{
  int pages, offset;
  struct ef_msg *msg;
  int rc;
  unsigned flags;

  if(svh->buffer_requests_left == 0){
    VNIC_VERB(ci_log("%s: All done, no need to call again thank you",
                     __FUNCTION__));
    return 0;
  }

  do{
    offset = EF_BUF_MAX_PAGES - svh->buffer_requests_left;
    pages = ci_pow2(ci_log2_le(svh->buffer_requests_left));
    pages = CI_MIN(pages, EF_MSG_MAX_PAGE_REQ);

    ci_assert_ge(offset, 0);
    ci_assert_gt(pages, 0);

    msg = ef_bec_start_msg(bec, &flags);
    if(msg == NULL){
      /* even though it returned an error, we still have to abort the
         started message */
      ef_bec_abort_msg(bec, &flags);
      rc = -ENOSPC;
      ci_log("%s: queue full, stopping for now", __FUNCTION__);
      break;
    }

    rc = ef_vnic_buf_map_request(bec->dev, svh->bufs, msg, pages, offset);
    if (rc == 0)
      ef_bec_complete_msg(bec, &flags);
    else{
      ci_log("%s: problem with grant, stopping for now", __FUNCTION__);
      ef_bec_abort_msg(bec, &flags);
      break;
    }

    svh->buffer_requests_left -= pages;
  }while(svh->buffer_requests_left);

  return rc;
}


/* Process a packet received from the back end driver */
irqreturn_t pkt_from_bend(int irq, void *context, struct pt_regs *unused)
{
  struct xenbus_device *dev = context;
  ef_vnic_bec *bec = (ef_vnic_bec *)dev->dev.driver_data;
  ef_vnic_osd_interrupt(EF_VNIC_OSD_FROM_BEC(bec));

  return IRQ_HANDLED;
}

static int make_pktmem_msg(struct xenbus_device *dev, ef_vnic_bec *bec,
                           struct ef_msg *msg, int type, int pages, 
                           struct page **page_list)
{
  int rc = 0;
  int i, pfn;

  ci_assert(type == EF_MSG_RXMEM || type == EF_MSG_TXMEM);
  ef_msg_init(msg, type);

  /* Tell the other side how many pages there. */
  msg->u.pktmem.pages = pages;
  for (i = 0; i < pages; i++) {
    pfn = page_to_pfn(page_list[i]);
    msg->u.pktmem.grants[i] = xenbus_grant_ring(dev, pfn_to_mfn(pfn));
    if (msg->u.pktmem.grants[i] < 0) {
      ci_log("Failed to grant receive packet window: %d\n", 
             msg->u.pktmem.grants[i]);
      rc = -EIO;
      break;
    }
  }

  /* Add a second grant for the first page, to form a partial virtual
     ring buffer. NB. the "grants" array will now be one larger than
     indicated by the rcv_pages value */
  pfn = page_to_pfn(page_list[0]);
  msg->u.pktmem.grants[pages] = 
    xenbus_grant_ring(dev, pfn_to_mfn(pfn));

  if (msg->u.pktmem.grants[pages] < 0) {
    ci_log("Failed to grant receive packet window for VRB: %d\n", 
           msg->u.pktmem.grants[pages]);
    rc = -EIO;
  }
    
  return rc;
}


/* Get a set of page grants for our slow receive FIFO space and shove them 
 * into a message. */
inline int make_rxmem_msg(struct xenbus_device *dev, ef_vnic_bec *bec, 
                          struct ef_msg *msg)
{
  return make_pktmem_msg(dev, bec, msg, EF_MSG_RXMEM, bec->rcv_pages,
                         bec->rcv_page_list);
}


/* Get a set of page grants for our slow send FIFO space and shove them 
 * into a message. */
inline int make_txmem_msg(struct xenbus_device *dev, ef_vnic_bec *bec, 
                          struct ef_msg *msg)
{
  return make_pktmem_msg(dev, bec, msg, EF_MSG_TXMEM, bec->snd_pages,
                         bec->snd_page_list);
}


/* In response to dom0 saying "my queue is full", we reply with this
   when it is no longer full */
inline void vnic_set_queue_not_full(ef_vnic_bec *bec)
{
  if(!ci_bit_test_and_set(&bec->shared_page->aflags, MSG_AFLAGS_QUEUE0NOTFULL_B))
    ef_hyperop_remote_irq(bec->channel);
  else
    ci_log("queue not full bit already set, not signalling");
}

/* Notify dom0 that the queue we want to use is full, it should
   respond by setting MSG_AFLAGS_QUEUEUNOTFULL in due course */
inline void vnic_set_queue_full(ef_vnic_bec *bec)
{
  if(!ci_bit_test_and_set(&bec->shared_page->aflags, MSG_AFLAGS_QUEUEUFULL_B))
    ef_hyperop_remote_irq(bec->channel);
  else
    ci_log("queue full bit already set, not signalling");
}


/* Work out how many reply spaces are needed to process this message,
   lock the queue, and check that those spaces are available.  If no
   reply necessary, or queue full, leaves queue unlocked */
static 
int vnic_lock_msg_replies(struct ef_msg *msg, sh_msg_fifo2 *queue, 
                          unsigned *flags)
{
  int reply_slots;

  switch(msg->id) {
  case EF_MSG_HELLO:
    reply_slots = 1;
    break;
  case EF_MSG_SETHW:
    reply_slots = 2;
    break;
  case EF_MSG_LOCALMAC:
    reply_slots = 0;
    break;
  default:
    if(msg->id & EF_MSG_REPLY)
      reply_slots = 0;
    else{
      ci_log("%s: Unknown number of reply slots required", __FUNCTION__);
      /* Assume zero for now */
      reply_slots = 0;
    }
    break;
  }

  if(reply_slots){
    ef_msg_lock_queue(queue, flags);
    if(!ef_msg_check_space(queue, reply_slots)){
      ef_msg_unlock_queue(queue, flags);
      ci_log("%s: no space for %d slots", __FUNCTION__, reply_slots);
      return -ENOSPC;
    }
  }

  return reply_slots;
}



/* Process an IRQ received from back end driver */
static irqreturn_t irq_from_bend(int irq, void *context, struct pt_regs *unused)
{
  struct xenbus_device *dev = context;
  ef_vnic_bec *bec = (ef_vnic_bec *)dev->dev.driver_data;
  /* FIXME: Private. */
  ef_vnic_svh *svh = EF_VNIC_SVH_FROM_BEC(bec);
  struct ef_msg msg;
  int err, reply_slots, cookie, queue_was_full = 0;
  unsigned flags1, flags2;
  static int sent_pkts = 0;
  ci_log("irq %d from device %s", irq, dev->nodename);

  while((bec->shared_page->aflags & MSG_AFLAGS_TO_DOMU_MASK) != 0){
    VNIC_VERB(ci_log("aflags is %08x", bec->shared_page->aflags));

    /* Some of the flags have been set */
    /* In future, may want to take advantage of the slowpath/fastpath
       mask split of aflags to decide which to do immediately inline,
       which to fall through to slow path */
    if(bec->shared_page->aflags & MSG_AFLAGS_MTUCHANGE){
      ci_bit_clear(&bec->shared_page->aflags, MSG_AFLAGS_MTUCHANGE_B);
      ci_log("got mtu change %d", bec->shared_page->mtu);
    }
    
    if(bec->shared_page->aflags & MSG_AFLAGS_LINKSTATE){
      ci_bit_clear(&bec->shared_page->aflags, MSG_AFLAGS_LINKSTATE_B);
      ci_log("got link state %d", bec->shared_page->link_state);
    }

    if(bec->shared_page->aflags & MSG_AFLAGS_QUEUEUNOTFULL){
      /* We've been told there may now be space. */
      ci_bit_clear(&bec->shared_page->aflags, MSG_AFLAGS_QUEUEUNOTFULL_B);

      if(svh->buffer_requests_left){
        /* Send any pending buffer map request messages that we can */
        if(vnic_send_buffer_requests(bec, svh) == -ENOSPC)
          vnic_set_queue_full(bec);
      }
    }

    if(bec->shared_page->aflags & MSG_AFLAGS_QUEUE0FULL){
      /* There will be space at the end of this function if we can
         make any.  TODO make this more intelligent, so that separate
         interrupts aren't used for sending replies (if present) and
         this message */
      ci_bit_clear(&bec->shared_page->aflags, MSG_AFLAGS_QUEUE0FULL_B);
      queue_was_full = 1;
    }
  }
  /* Pull msg out of shared memory */

  /* NB. There is a potential race here whereby someone else could
     pull our message from the queue (as we only peek at it) before
     we've finished using it. Currently not a problem as we only read
     messages out here. */
  while((err = ef_msg_peek(bec->shared_page, &bec->from_dom0, &msg, &cookie))
        == 0){
    if((reply_slots = vnic_lock_msg_replies(&msg, &bec->to_dom0, &flags1)) 
       >= 0) {
      switch(msg.id) {
      case EF_MSG_HELLO:
        /* Hello, reply with Reply */
        ci_log("got Hello, with version %.8x", msg.u.hello.version);
        msg.id = EF_MSG_HELLO | EF_MSG_REPLY;
        msg.u.hello.version = EF_VPROTO_VERSION;
        /* Note the MAC we were given. */
        memcpy(svh->ep_state.mac, msg.u.hello.mac, ETH_ALEN);
        /* Send reply */
        ef_msg_send_notify_locked(bec->shared_page, bec->channel,
                                  &bec->to_dom0, &msg);
        ef_msg_unlock_queue(&bec->to_dom0, &flags1);
        break;
      case EF_MSG_SETHW:
        /* Hardware info message */
        ci_log("got H/W info");
        /* This may send one or more SVH-specific messages to the bend */
        if (vnic_map_hardware(dev, &msg.u.hw) < 0 )  {
          /* Error - Change state to closing */
          vnic_schedule_state(bec, XenbusStateClosing);
          ef_msg_unlock_queue(&bec->to_dom0, &flags1);
        } else {
          /* Time to set up the slowpath FIFOs */
          if (!sent_pkts) {
            sent_pkts = 1;
            /* Construct rx mem reply */
            make_rxmem_msg(dev, bec, &msg);
            /* Send rx mem reply - no notify as we're about to send another one */
            ef_msg_send_locked(bec->shared_page, &bec->to_dom0, &msg);
            /* Construct tx mem reply */
            make_txmem_msg(dev, bec, &msg);
            /* Send tx mem reply */
            ef_msg_send_notify_locked(bec->shared_page, bec->channel, 
                                      &bec->to_dom0, &msg);
          }
          ef_msg_unlock_queue(&bec->to_dom0, &flags1);
          /* Send buffer request messages if room */
          if(vnic_send_buffer_requests(bec, svh) == -ENOSPC)
            vnic_set_queue_full(bec);
          /* Change state to connected */
          vnic_schedule_state(bec, XenbusStateConnected);
        }
        break;
      case EF_MSG_GETBUF | EF_MSG_REPLY:
        /* Received reply to GETBUF request */
        VNIC_VERB(ci_log("got some buffers back."));
        /* Remap the buffers received */
        vnic_convert_bufs(dev, &msg);
        /* Queue up some Rx buffers to start things off. */
        ef_vnic_svh_add_bufs(svh, &msg);
        break;
      case EF_MSG_GETBUF | EF_MSG_REPLY | EF_MSG_ERROR:
        /* No buffers.  Can't use the fast path. */
        /* FIXME: Graceful... */
        ci_fail(("Oh dear.  I couldn't get any buffers."));
        break;
      case EF_MSG_MAPBUF | EF_MSG_REPLY:
        ci_log("Got mapped buffers back");
        vnic_convert_mapped_bufs(svh, &msg);
        /* Queue up some Rx buffers to start things off. */
        ef_vnic_svh_add_bufs(svh, &msg);
        break;
      case EF_MSG_MAPBUF | EF_MSG_REPLY | EF_MSG_ERROR:
        /* No buffers.  Can't use the fast path. */
        /* FIXME: Graceful... */
        ci_fail(("Got mapped buffers error"));
        break;
      case EF_MSG_LOCALMAC:
        /* Should be either add or remove */
        ci_assert((msg.u.localmac.flags & EF_MSG_LOCALMAC_ADD) ^ 
                  (msg.u.localmac.flags & EF_MSG_LOCALMAC_REMOVE));
        if(msg.u.localmac.flags & EF_MSG_LOCALMAC_ADD){
          ci_log("New local mac address to filter to slow path");
          spin_lock_irqsave(&svh->local_macs_lock, flags2);
          if(!ef_cuckoo_hash_add(&svh->local_macs_table, msg.u.localmac.mac, 0, 1))
            ci_log("Failed to add to hash table");
          spin_unlock_irqrestore(&svh->local_macs_lock, flags2);
        }
        if(msg.u.localmac.flags & EF_MSG_LOCALMAC_REMOVE){
          ci_log("Remove local mac address from filter to slow path");
          spin_lock_irqsave(&svh->local_macs_lock, flags2);
          if(ef_cuckoo_hash_remove(&svh->local_macs_table, msg.u.localmac.mac))
            ci_log("Failed to remove from hash table");
          spin_unlock_irqrestore(&svh->local_macs_lock, flags2);
        }
      default:
        ci_log("Huh? Message code is %x", msg.id);
        break;
      }
      
      /* Finally, remove the message from the from_dom0 queue. */
      err = ef_msg_recv_next(bec->shared_page, &bec->from_dom0, cookie);
      /* Can assert this as it should be the same error we got from
         peek */
      ci_assert_equal(err, 0);
    }
    else{
      ci_log("%s: no space for reply, not processing incoming message",
             __FUNCTION__);
      /* Couldn't get the reply slots needed */
      /* Don't process the incoming message until possible to reply */
      vnic_set_queue_full(bec);
      goto giveup;
    }
  }
 giveup:
  /* We will now have made space if we can */
  if(queue_was_full)
    vnic_set_queue_not_full(bec);

  if (err != -ENOENT)
    /* ENOENT is to be expected if we've use the IRQ to signal change
       in aflags */
    ci_log("ef_msg_recv returned %d", err);
  return IRQ_HANDLED;
}


/* Create a receive memory window */
static void *alloc_byte_fifo_mem(struct page ***page_list, int *pages, 
                                 int target)
{
  void *ret;
  int n;

  /* +1 to target so we have space for the first page twice - it goes
     at the end to make the partial ring buffer */
  *page_list = kzalloc((target + 1) * sizeof(struct page *), GFP_ATOMIC);

  if (*page_list == NULL) {
    return NULL;
  }

  for (n = 0; n < target; n++) {
    void *tmp = (void*)__get_free_page(GFP_ATOMIC);
    if (tmp == NULL)
      break; /* Well, we got what we could. */

    (*page_list)[n] = virt_to_page(tmp);
  }

  /* Add another copy of the first page to the end of the list to make
     the virtual ring buffer */
  (*page_list)[n] = (*page_list)[0];

  /* Don't count the copy of the first page in this count */
  *pages = n;
  /* Do count the copy of the first page in this */
  ret = vmap(*page_list, n+1, VM_MAP, PAGE_KERNEL);

  ci_log("Allocated %d pages for byte fifo memory, mapped at %p", n, ret);

  return ret;
}


inline void *alloc_rcv_mem(ef_vnic_bec *bec)
{
  return alloc_byte_fifo_mem(&bec->rcv_page_list, &bec->rcv_pages, 
                             VNIC_RX_PAGES);
}

inline void *alloc_snd_mem(ef_vnic_bec *bec)
{
  return alloc_byte_fifo_mem(&bec->snd_page_list, &bec->snd_pages,
                             VNIC_TX_PAGES);
}


/* Grant access to some pages? */
static int make_named_grant(struct xenbus_device *dev, void *page, 
                            const char *name)
{
  struct xenbus_transaction tr;
  int err, gnt;
  gnt = xenbus_grant_ring(dev, virt_to_mfn(page));
  if (gnt < 0)
    return gnt;
  err = xenbus_transaction_start(&tr);
  if (err == 0)
    err = xenbus_printf(tr, dev->nodename, name, "%d", gnt);
  xenbus_transaction_end(tr, err != 0 );
  return err;
}


/* Setup the shared state */
static int vnic_setup_shared_state(struct xenbus_device *dev)
{
  struct xenbus_transaction tr;
  int err, msgs_per_queue;
  ef_vnic_bec *bec;
  ef_vnic_svh *svh;
  ef_vnic_all *vnic_all;

  ci_log("Setting up shared state.");

  /* Alloc mem for state */
  vnic_all = kzalloc(sizeof(ef_vnic_all), GFP_KERNEL);
  if (vnic_all == NULL) {
    xenbus_dev_fatal(dev, -ENOMEM, "allocating vnic data");
    return -ENOMEM;
  }

  bec = &vnic_all->bec;
  svh = &vnic_all->svh;

  /* Store so state can be retrieved from device */
  dev->dev.driver_data = bec;
  bec->dev = dev;
  /* Add state change hook */
  INIT_WORK(&bec->state_change, vnic_state_change, dev);

  /* Create xenbus event channel */
  err = xenbus_alloc_evtchn(dev, &bec->channel);
  if (err) {
    xenbus_dev_fatal(dev, err, "allocating event channel");
    return err;
  }
  ci_log("event channel is %u", bec->channel);

  /* Create xenbus net event channel */
  err = xenbus_alloc_evtchn(dev, &bec->net_channel);
  if (err) {
    xenbus_dev_fatal(dev, err, "allocating net event channel");
    return err;
  }
  ci_log("net event channel is %u", bec->net_channel);

  bec->shared_page = (struct ef_shared_page *)__get_free_pages(GFP_KERNEL, 1);
  if (bec->shared_page == NULL) {
    xenbus_dev_fatal(dev, -ENOMEM, "allocating shared page");
    return -ENOMEM; /* TODO: how about some cleanup */
  }

  err = make_named_grant(dev, bec->shared_page, "ctrl-page");
  if (err)
    goto fail;

  err = make_named_grant(dev, (__u8*)bec->shared_page + PAGE_SIZE, "msg-page");
  if (err)
    goto fail;

  msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct ef_msg);

  ef_msg_init_queue(&bec->from_dom0, &bec->shared_page->queue0, 
                    (struct ef_msg *)((__u8*)bec->shared_page + PAGE_SIZE),
                    msgs_per_queue);

  ef_msg_init_queue(&bec->to_dom0, &bec->shared_page->queue1,
                    (struct ef_msg *)((__u8*)bec->shared_page + (3 * PAGE_SIZE / 2)),
                    msgs_per_queue);


  err = xenbus_transaction_start(&tr);
  if (err)
    goto fail;
  err = xenbus_printf(tr, dev->nodename, "event-channel", "%u", bec->channel);
  if (err)
    goto fail;
  err = xenbus_printf(tr, dev->nodename, "net-channel", "%u", bec->net_channel);
  if (err)
    goto fail;
  err = xenbus_transaction_end(tr, err != 0 );
  if (err)
    goto fail;

  /* And the shared memory window for Rx packets */
  bec->rcv_pkts.fifo = alloc_rcv_mem(bec);
  if (bec->rcv_pkts.fifo == NULL) {
    ci_log("Failed to allocate rx memory window.");
    goto fail;
  }
  sh_fifo2_init(&bec->rcv_pkts, bec->rcv_pages * PAGE_SIZE - 1,
                &bec->shared_page->rcv_pkts_rd, 
                &bec->shared_page->rcv_pkts_wr);

  /* And the shared memory window for Tx packets */
  bec->snd_pkts.fifo = alloc_snd_mem(bec);
  if (bec->snd_pkts.fifo == NULL) {
    ci_log("Failed to allocate tx memory window.");
    goto fail;
  }
  sh_fifo2_init(&bec->snd_pkts, bec->snd_pages * PAGE_SIZE - 1,
                &bec->shared_page->snd_pkts_rd, 
                &bec->shared_page->snd_pkts_wr);
  ci_log("tx FIFO for BEC %p is at %p", bec, bec->snd_pkts.fifo);
  err = bind_evtchn_to_irqhandler(bec->channel, irq_from_bend,
                                  SA_SAMPLE_RANDOM, "vnicfront", dev);
  if (err < 0)
    goto fail;

  return 0;

fail:
    if (err) {
      dev->dev.driver_data = NULL;
      xenbus_dev_fatal(dev, err, "setting up shared state");
    }
    return err;
}

static int backend_initialised(struct xenbus_device *dev)
{
  int state;
  int err = xenbus_gather(XBT_NIL, dev->otherend, "state", "%d", &state, NULL);
  return (err == 0 && state == XenbusStateInitialised);
}

int vnic_probe(struct xenbus_device *dev,
        const struct xenbus_device_id *id)
{
  int err = 0;
  ci_log("Probe passed device %s", dev->nodename);
  if (backend_initialised(dev)) {
    err = vnic_setup_shared_state(dev);
    xenbus_switch_state(dev, err ?  XenbusStateClosing : XenbusStateInitialised);
  }
  return err;
}

int vnic_resume(struct xenbus_device *dev)
{
  return 0;
}

int vnic_remove(struct xenbus_device *dev)
{
  ef_vnic_bec *bec = (ef_vnic_bec *)dev->dev.driver_data;

  ef_vnic_osd_remove(EF_VNIC_OSD_FROM_BEC(bec));

  /* The state of the Xenbus device will persist, we don't want to find
   * this pointer if we are reloaded. */
  dev->dev.driver_data = NULL;
  return 0;
}

void vnic_bend_changed(struct xenbus_device *dev, XenbusState bend_state)
{
  int err;
  ci_log("vnic_bend_changed called, state now %d.", bend_state);
  switch(bend_state) {
    case XenbusStateInitialised:
      ci_log("backend to XenbusStateInitialised");
      if (dev->dev.driver_data == NULL) {
        err = vnic_setup_shared_state(dev);
        xenbus_switch_state(dev, err ?  XenbusStateClosing : XenbusStateInitialised);
      }
      break;
    case XenbusStateConnected:
      if (dev->dev.driver_data == NULL) {
        ci_log("Backend reckons it's connected, but we aren't! Must be a reload");
        err = vnic_setup_shared_state(dev);
        xenbus_switch_state(dev, err ?  XenbusStateClosing : XenbusStateInitialised);
      }
      break;
    default:
      ci_log("who cares?");
      break;
  }
}

void ef_vnic_svh_unpost_all(ef_vnic_svh *svh)
{
  ci_log("%s unsupported.", __FUNCTION__);
  ef_vnic_osd_unposted_all(EF_VNIC_OSD_FROM_SVH(svh));
}

void ef_vnic_bec_disable_interrupts(ef_vnic_bec *bec)
{
  mask_evtchn(bec->net_channel);
}

void ef_vnic_bec_enable_interrupts(ef_vnic_bec *bec)
{
  unmask_evtchn(bec->net_channel);
}
