/**************************************************************************\
*//*! \file ef_vnic_osd.c VNIC Operating system driver

Copyright 2006 Solarflare Communications Inc,
               9501 Jeronimo Road, Suite 250,
               Irvine, CA 92618, USA

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License version 2 as published by the Free
Software Foundation, incorporated herein by reference.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*//*
\**************************************************************************/
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/in.h>
#include <linux/crc32.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
#include <ci/driver/virtual/vnic.h>
#include <ci/tools/profile.h>


/*** Constants ***********************************************************/

#define OSD_STOP_REASON_SUSPEND 1
#define OSD_STOP_REASON_REMOVE  2
#define OSD_STOP_REASON_CLOSED  4
#define OSD_STOP_REASON_RESET   8


/* FIXME: this is purely for statistics */
#include "ef_vnic_bufs.h"

#define EF_ATTR_RO(name) \
  static struct class_device_attribute name = \
    __ATTR_RO(name)

#define EF_ATTR_RW(name) \
  static struct class_device_attribute name = \
    __ATTR(name, 0644, name##_show, name##_store)

#define EF_ATTR_WO(name) \
  static struct class_device_attribute name = \
    __ATTR(name, 0200, ef_attr_null_show, name##_store)

static  ef_vnic_svh *class_dev_to_svh(struct class_device *dev)
{
  struct net_device *net = container_of(dev, struct net_device, class_dev);
  ef_vnic_osd *osd = (ef_vnic_osd *)net->priv;
  return EF_VNIC_SVH_FROM_OSD(osd);
}

static  ef_vnic_osd *class_dev_to_osd(struct class_device *dev)
{
  struct net_device *net = container_of(dev, struct net_device, class_dev);
  ef_vnic_osd *osd = (ef_vnic_osd *)net->priv;
  return osd;
}

static ssize_t ef_attr_null_show(struct class_device *dev, char * buf)
{
  return -EINVAL;
}


static ssize_t fast_rx_bytes_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  return snprintf(buf, PAGE_SIZE, "%d\n", svh->fastpath_rx_bytes);
}
EF_ATTR_RO(fast_rx_bytes);


static ssize_t fast_rx_frames_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  return snprintf(buf, PAGE_SIZE, "%d\n", svh->fastpath_rx_pkts);
}
EF_ATTR_RO(fast_rx_frames);


static ssize_t fast_tx_bytes_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  return snprintf(buf, PAGE_SIZE, "%d\n", svh->fastpath_tx_bytes);
}
EF_ATTR_RO(fast_tx_bytes);


static ssize_t fast_tx_frames_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  return snprintf(buf, PAGE_SIZE, "%d\n", svh->fastpath_tx_pkts);
}
EF_ATTR_RO(fast_tx_frames);


static ssize_t fast_tx_pending_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result = svh->fastpath_tx_pkts - svh->fastpath_tx_completions;
  if(result < 0) result = 0;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(fast_tx_pending);


static ssize_t fast_tx_pending_max_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result = svh->fastpath_tx_pending_max;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(fast_tx_pending_max);


static ssize_t fast_tx_busy_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result = svh->fastpath_tx_busy;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(fast_tx_busy);


static ssize_t rx_prealloc_show(struct class_device *dev, char * buf)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result = skb_queue_len(&osd->rx_skb_list);
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(rx_prealloc);


static ssize_t rx_prealloc_min_show(struct class_device *dev, char * buf)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result = osd->rx_min_fill;
  return snprintf(buf, PAGE_SIZE, "%u\n", result);
}
EF_ATTR_RO(rx_prealloc_min);


static ssize_t rx_prealloc_max_show(struct class_device *dev, char * buf)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result = osd->rx_max_fill;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
static ssize_t rx_prealloc_max_store(struct class_device *dev,
                                     const char * buf,
                                     size_t len)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result;

  sscanf(buf, "%d", &result);
  osd->rx_max_fill = result;
  return len;
}
EF_ATTR_RW(rx_prealloc_max);


static ssize_t rx_fast_alloc_show(struct class_device *dev, char * buf)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result = osd->fastpath_rx_alloc;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(rx_fast_alloc);


static ssize_t rx_refills_show(struct class_device *dev, char * buf)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result = osd->rx_refills;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(rx_refills);


static ssize_t fast_rx_dma_level_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result = svh->rx_dma_level;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(fast_rx_dma_level);


static ssize_t fast_rx_dma_max_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result = svh->rx_dma_max;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
static ssize_t fast_rx_dma_max_store(struct class_device *dev,
                                     const char * buf,
                                     size_t len)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result;

  sscanf(buf, "%d", &result);
  svh->rx_dma_max = result;
  return len;
}
EF_ATTR_RW(fast_rx_dma_max);


static ssize_t fast_buffers_used_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  return snprintf(buf, PAGE_SIZE, "%d\n",
                  ef_vnic_buf_nused(svh->bufs));
}
EF_ATTR_RO(fast_buffers_used);


static ssize_t fast_buffers_count_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  return snprintf(buf, PAGE_SIZE, "%d\n",
                  ef_vnic_buf_ntotal(svh->bufs));
}
EF_ATTR_RO(fast_buffers_count);


static ssize_t irq_count_show(struct class_device *dev, char * buf)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result = osd->irq_count;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(irq_count);


static ssize_t poll_schedule_count_show(struct class_device *dev, char * buf)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result = osd->poll_schedule_count;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(poll_schedule_count);


static ssize_t poll_call_count_show(struct class_device *dev, char * buf)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result = osd->poll_call_count;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(poll_call_count);


static ssize_t poll_reschedule_count_show(struct class_device *dev, char * buf)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result = osd->poll_reschedule_count;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(poll_reschedule_count);


static ssize_t event_count_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result = svh->event_count;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(event_count);


static ssize_t events_per_irq_max_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result = svh->events_per_irq_max;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(events_per_irq_max);


static ssize_t useless_irq_count_show(struct class_device *dev, char * buf)
{
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  int result = osd->useless_irq_count;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
EF_ATTR_RO(useless_irq_count);


static ssize_t fast_rx_pkt_stride_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result = svh->rx_pkt_stride;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
static ssize_t fast_rx_pkt_stride_store(struct class_device *dev,
                                        const char * buf,
                                        size_t len)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result;

  sscanf(buf, "%d", &result);
  svh->rx_pkt_stride = result;
  return len;
}
EF_ATTR_RW(fast_rx_pkt_stride);


static ssize_t fast_rx_skb_stride_show(struct class_device *dev, char * buf)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result = svh->rx_skb_stride;
  return snprintf(buf, PAGE_SIZE, "%d\n", result);
}
static ssize_t fast_rx_skb_stride_store(struct class_device *dev,
                                        const char * buf,
                                        size_t len)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  int result;

  sscanf(buf, "%d", &result);
  svh->rx_skb_stride = result;
  return len;
}
EF_ATTR_RW(fast_rx_skb_stride);


static ssize_t reset_stats_store(struct class_device *dev,
                                 const char * buf,
                                 size_t len)
{
  ef_vnic_svh *svh = class_dev_to_svh(dev);
  ef_vnic_osd *osd = class_dev_to_osd(dev);
  svh->fastpath_rx_bytes = 0;
  svh->fastpath_rx_pkts = 0;
  svh->fastpath_tx_bytes = 0;
  svh->fastpath_tx_pkts = 0;
  svh->fastpath_tx_pending_max = 0;
  svh->fastpath_tx_busy = 0;
  osd->rx_min_fill = ~0;
  osd->fastpath_rx_alloc = 0;
  osd->rx_refills = 0;
  osd->irq_count = 0;
  osd->poll_schedule_count = 0;
  osd->poll_call_count = 0;
  osd->poll_reschedule_count = 0;
  svh->event_count = 0;
  svh->events_per_irq_max = 0;
  osd->useless_irq_count = 0;
  return len;
}
EF_ATTR_WO(reset_stats);


static struct attribute *accel_attrs[] = {
  &fast_rx_bytes.attr,
  &fast_rx_frames.attr,
  &fast_tx_bytes.attr,
  &fast_tx_frames.attr,
  &fast_tx_pending.attr,
  &fast_tx_pending_max.attr,
  &fast_tx_busy.attr,
  &rx_prealloc.attr,
  &rx_prealloc_min.attr,
  &rx_prealloc_max.attr,
  &rx_fast_alloc.attr,
  &rx_refills.attr,
  &fast_rx_dma_level.attr,
  &fast_rx_dma_max.attr,
  &fast_buffers_used.attr,
  &fast_buffers_count.attr,
  &reset_stats.attr,
  &irq_count.attr,
  &poll_schedule_count.attr,
  &poll_call_count.attr,
  &poll_reschedule_count.attr,
  &event_count.attr,
  &events_per_irq_max.attr,
  &useless_irq_count.attr,
  &fast_rx_pkt_stride.attr,
  &fast_rx_skb_stride.attr,
  NULL};


static struct attribute_group accel_attrg = {
  .name = "vmaccel",
  .attrs = accel_attrs
};

/*** Helper functions ****************************************************/

ef_vnic_pktbuff *ef_vnic_osd_new_rx_buffer(ef_vnic_osd *osd, int len)
{
  struct sk_buff *skb = NULL;
  ci_uint32 fill_level;

  if(CI_LIKELY(!skb_queue_empty(&osd->rx_skb_list))) {
    skb = skb_dequeue(&osd->rx_skb_list);
    if(skb_tailroom(skb) < len) {
      ci_log("%s: This sk_buff is no good.  %d<%d",
             __FUNCTION__, skb_tailroom(skb), len);
      dev_kfree_skb_any(skb);
      skb = NULL;
    }

    /* Update the fill level. */
    fill_level = skb_queue_len(&osd->rx_skb_list);
    if ( fill_level < osd->rx_min_fill ) {
      osd->rx_min_fill = fill_level;
      VNIC_VERB(ci_log("%s: Minimum fill level is %d", __FUNCTION__, fill_level));
    }
  }

  if(CI_UNLIKELY(skb==NULL)) {
    skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC);
    osd->fastpath_rx_alloc++;
    if(skb == NULL) {
      return NULL;
    }
#if 0
    skb_reserve(skb, NET_IP_ALIGN);
#endif
  }

  return skb;
}

static void ef_vnic_osd_alloc_rx_slow(ef_vnic_osd *osd)
{
  ci_uint32 fill_level;
  struct sk_buff *skb;

  /* Ensure that only one fill is occurring at any time. */
  if(down_trylock(&osd->rx_fill_mutex))
    return;

  osd->rx_refills++;

  while(1) {
    fill_level = skb_queue_len(&osd->rx_skb_list);

    VNIC_VERB(ci_log("%s: Fill level is %d", __FUNCTION__, fill_level));

    /* Quick return if there are already enough buffers posted. */
    if ( fill_level >= osd->rx_max_fill ) {
      VNIC_VERB(ci_log("%s: Quick return", __FUNCTION__));
      break;
    }

    /* FIXME: What level should be used here? */
    skb = alloc_skb(NET_IP_ALIGN+ETH_HLEN+EF_VNIC_INF_FROM_OSD(osd)->mtu,
                    GFP_KERNEL);
    if ( skb == NULL ) {
      ci_log("%s: Erp!  No memory!", __FUNCTION__);
      break;
    }

    VNIC_VERB(ci_log("%s: Queueing skb %p", __FUNCTION__, skb));
#if 0
    skb_reserve(skb, NET_IP_ALIGN);
#endif
    skb_queue_head(&osd->rx_skb_list, skb);
  }

  up(&osd->rx_fill_mutex);
}

static void ef_vnic_osd_stop(ef_vnic_osd *osd, unsigned int reason)
{
  int stop_reasons;
  unsigned flags;

  ci_log("%s(%d) called (stop_reasons=%d)" , __FUNCTION__, reason,
         osd->stop_reasons);

  /* We shouldn't be stopped for this reason. */
  ci_assert_equal((osd->stop_reasons | ~reason), ~reason);

  spin_lock_irqsave(&osd->stop_lock, flags);
  stop_reasons = osd->stop_reasons;
  osd->stop_reasons |= reason;
  spin_unlock_irqrestore(&osd->stop_lock, flags);

  /* Nothing more if we were already stopped. */
  if(stop_reasons != 0) {
    ci_log("%s(%d) Quick return (stop_reasons=%d)" , __FUNCTION__, reason,
           osd->stop_reasons);
    return;
  }

  /* Stop all sends.  They will not be restarted because stop_reasons
   * is non-zero. */
  netif_tx_disable(osd->net_dev);

  /* Reclaim all packets.  No more packets will be posted because
   * stop_reasons is non-zero. */

  ef_vnic_svh_unpost_all(EF_VNIC_SVH_FROM_OSD(osd));
  down(&osd->unpost_sem);

  /* Drop the spare packet, if any.  We're expecting to lose packets
   * in any case. */
  if(osd->tx_skb) {
    dev_kfree_skb_any(osd->tx_skb);
    osd->tx_skb = NULL;
    osd->stats.tx_aborted_errors++;
  }

  /* Disable interrupt delivery and stop polling. */
  spin_lock_irqsave(&osd->stop_lock, flags);
  osd->irq_enabled = CI_FALSE;
  ef_vnic_bec_disable_interrupts(EF_VNIC_BEC_FROM_OSD(osd));
  spin_unlock_irqrestore(&osd->stop_lock, flags);

  /* This terminates the poll upto netif_rx_complete.  Since
   * irq_enabled is now false, the final code after that is inert.  We
   * need to enable polling again since polling disabled looks the
   * same as polling active and unregistering the interface waits
   * until polling is inactive and hence enabled. */
  netif_poll_disable(osd->net_dev);
  netif_poll_enable (osd->net_dev);

  /* The link is now down. */
  netif_carrier_off(osd->net_dev);

  ci_log("%s(%d) returning (stop_reasons=%d)" , __FUNCTION__, reason,
         osd->stop_reasons);

  /* Nobody should have started for this reason.  This ensures that
   * nobody can be starting at the same time since this stop reason is
   * still outstanding. */
  ci_assert_equal((osd->stop_reasons & reason), reason);
}

static void ef_vnic_osd_start(ef_vnic_osd *osd, unsigned int reason)
{
  unsigned flags;

  ci_log("%s(%d) called (stop_reasons=%d)" , __FUNCTION__, reason,
         osd->stop_reasons);

  /* We should be stopped for this reason. */
  ci_assert_equal((osd->stop_reasons & reason), reason);

  /* Decrement the counter.  Continue with the lock held if it reaches
   * zero, so we don't race with ef_vnic_osd_stop. */
  spin_lock_irqsave(&osd->stop_lock, flags);
  osd->stop_reasons &= ~reason;
  if(osd->stop_reasons != 0) {
    spin_unlock_irqrestore(&osd->stop_lock, flags);
    ci_log("%s(%d) Quick return (stop_reasons=%d)" , __FUNCTION__, reason,
           osd->stop_reasons);
    return;
  }

  /* Mark the link as up if necessary. */
  if(EF_VNIC_INF_FROM_OSD(osd)->is_up)
    netif_carrier_on(osd->net_dev);

  /* Enable NAPI polling and enable interrupt delivery. */
  osd->irq_enabled = CI_TRUE;
  ci_wmb();
  if(ef_vnic_svh_enable_interrupts(EF_VNIC_SVH_FROM_OSD(osd))) {
    ef_vnic_bec_enable_interrupts(EF_VNIC_BEC_FROM_OSD(osd));
  } else {
    ci_log("%s() Interrupt pending", __FUNCTION__);
    ef_vnic_osd_interrupt(osd);
  }

  /* Start TX. */
  netif_wake_queue(osd->net_dev);

  /* It's now safe to drop the lock. */
  spin_unlock_irqrestore(&osd->stop_lock, flags);

  /* Pre-allocate some RX buffers to decrease latency. */
  ef_vnic_osd_alloc_rx_slow(osd);

  ci_log("%s(%d) returning (stop_reasons=%d)" , __FUNCTION__, reason,
         osd->stop_reasons);

  /* We shouldn't be stopped for this reason. */
  ci_assert_equal((osd->stop_reasons | ~reason), ~reason);
}


static ci_boolean_t ef_vnic_osd_xmit(ef_vnic_osd *osd,
                                     struct sk_buff *skb)
{
  enum ef_vnic_post_status status;

  ci_assert_equal(osd->tx_skb, NULL);

  if(ef_vnic_svh_check_interrupts(EF_VNIC_SVH_FROM_OSD(osd))) {
    struct net_device *net_dev = osd->net_dev;
    if(netif_rx_schedule_prep(net_dev)) {
      ef_vnic_bec_disable_interrupts(EF_VNIC_BEC_FROM_OSD(osd));
#if 0
      ef_vnic_svh_disable_interrupts(EF_VNIC_SVH_FROM_OSD(osd));
#endif
      VNIC_VERB(ci_log("No need to wait for an interrupt."));
      osd->poll_schedule_count++;
      __netif_rx_schedule(net_dev);
    }
  }
  
  /* Pass the packet to the hardware. */
  status = ef_vnic_svh_tx_post(EF_VNIC_SVH_FROM_OSD(osd), skb);
  
  if(CI_UNLIKELY(status != EF_VNIC_STATUS_GOOD)) {
    if(status == EF_VNIC_STATUS_BUSY) {
      /* We will handle it later. */
      osd->tx_skb = skb;
      return CI_FALSE;
    }

    /* We don't like this packet for some reason. */
    dev_kfree_skb_any(skb);
    osd->stats.tx_fifo_errors++;
    return CI_TRUE;
  };

  osd->net_dev->trans_start = jiffies;

  return CI_TRUE;
}

static void ef_vnic_osd_update_mtu(ef_vnic_osd *osd)
{
  ef_vnic_svh_updated_mtu(EF_VNIC_SVH_FROM_OSD(osd));
}

static void ef_vnic_osd_reset_work(void *data) {
  ef_vnic_osd *osd = data;
  ci_log("%s(%p) called", __FUNCTION__, osd);
  down(&osd->reset_sem);
  ef_vnic_osd_stop(osd, OSD_STOP_REASON_RESET);
  ef_vnic_osd_start(osd, OSD_STOP_REASON_RESET);
  up(&osd->reset_sem);
  ci_log("%s(%p) returning", __FUNCTION__, osd);
}

static void ef_vnic_osd_rx_refill_work(void *data) {
  ef_vnic_osd *osd = data;
  VNIC_VERB(ci_log("%s(%p) called", __FUNCTION__, osd));
  /* Top up the pre-allocated buffer list. */
  ef_vnic_osd_alloc_rx_slow(osd);
}

/*** sysfs ***************************************************************/

#define to_class_dev(obj) container_of(obj,struct class_device,kobj)
#define to_net_dev(class) container_of(class, struct net_device, class_dev)

static inline int dev_isalive(const struct net_device *dev)
{
	return dev->reg_state == NETREG_REGISTERED;
}

/* use same locking rules as GIF* ioctl's */
static ssize_t netdev_show(const struct class_device *cd, char *buf,
			   ssize_t (*format)(const struct net_device *, char *))
{
	struct net_device *net = to_net_dev(cd);
	ssize_t ret = -EINVAL;

	read_lock(&dev_base_lock);
	if (dev_isalive(net))
		ret = (*format)(net, buf);
	read_unlock(&dev_base_lock);

	return ret;
}

static const char fmt_dec[] = "%d\n";

static ssize_t format_refcnt(const struct net_device *net_dev, char *buf)
{
  return sprintf(buf, fmt_dec, atomic_read(&net_dev->refcnt));
}
static ssize_t show_refcnt(struct class_device *cd, char *buf)
{
  return netdev_show(cd, buf, format_refcnt);
}

/* FIXME: We shouldn't really be exporting this. */
/* generate a read-only network device class attribute */
static CLASS_DEVICE_ATTR(refcnt, S_IRUGO, show_refcnt, NULL);


/*** Net driver functions ************************************************/

static int ef_vnic_osd_open(struct net_device *net_dev)
{
  ef_vnic_osd *osd = net_dev->priv;
  ci_log("%s(%p) called", __FUNCTION__, net_dev);
  ef_vnic_osd_start(osd, OSD_STOP_REASON_CLOSED);
  return 0;
}

static int ef_vnic_osd_close(struct net_device *net_dev)
{
  ef_vnic_osd *osd = net_dev->priv;
  ci_log("%s(%p) called", __FUNCTION__, net_dev);
  ef_vnic_osd_stop(osd, OSD_STOP_REASON_CLOSED);
  return 0;
}

static int ef_vnic_osd_ioctl(struct net_device *net_dev,
                             struct ifreq *ifr, int cmd)
{
  /* FIXME: We could support some MII registers to report the link
   * speed. */
  return -EOPNOTSUPP;
}

static struct net_device_stats *
ef_vnic_osd_get_stats(struct net_device *net_dev)
{
  ef_vnic_osd *osd = net_dev->priv;
  struct net_device_stats *stats = &osd->stats;

  stats->rx_errors = (stats->rx_length_errors +
                      stats->rx_over_errors +
                      stats->rx_crc_errors +
                      stats->rx_frame_errors +
                      stats->rx_fifo_errors +
                      stats->rx_missed_errors);

  stats->tx_errors = (stats->tx_aborted_errors +
                      stats->tx_carrier_errors +
                      stats->tx_fifo_errors +
                      stats->tx_heartbeat_errors +
                      stats->tx_window_errors);

  return stats;
}

static int ef_vnic_osd_hard_start_xmit(struct sk_buff *skb,
                                       struct net_device *net_dev)
{
  ef_vnic_osd *osd = net_dev->priv;
  ci_boolean_t handled;

  if(osd->tx_skb != NULL) {
    ci_log("%s: Already busy!", __FUNCTION__);
    return NETDEV_TX_BUSY;
  }

  VNIC_VERB(ci_log("%s: Accepting packet %p", __FUNCTION__, skb));

  handled = ef_vnic_osd_xmit(osd, skb);
  if(CI_UNLIKELY(!handled)) {
    VNIC_VERB(ci_log("%s stopping queue", __FUNCTION__));
    netif_stop_queue(net_dev);
  }

  return NETDEV_TX_OK;
}

static void ef_vnic_osd_tx_timeout(struct net_device *net_dev) {
#if 0 /* FIXME */
  ef_vnic_osd *osd = net_dev->priv;
  ci_log("%s() called", __FUNCTION__);
  schedule_work(&osd->reset_work);
#endif
}

#if 0
/* FIXME: Multicast support. */
static void ef_vnic_osd_set_multicast_list(struct net_device *net_dev)
{
  ef_vnic_osd *osd = net_dev->priv;
  ef_vnic_shv_set_multicast(EF_VNIC_SVH_FROM_OSD(osd));
}
#endif

static int ef_vnic_osd_poll(struct net_device *net_dev, int *budget)
{
  ef_vnic_osd *osd = net_dev->priv;
  int rx_allowed, rx_done;
  unsigned flags;

  /* FIXME: Since we can't disable interrupts properly, we have to do
   * the check here to find out if interrupts are being disabled.
   * This is done inside the poll so that ef_vnic_osd_stop can wait
   * until the effect has been noticed. */
  if(CI_UNLIKELY(!osd->irq_enabled)) {
    netif_rx_complete(net_dev);
    return 0; /* Done */
  }

  rx_allowed = CI_MIN(*budget, net_dev->quota);
  rx_done = ef_vnic_svh_poll(EF_VNIC_SVH_FROM_OSD(osd), rx_allowed);
  *budget -= rx_done;
  net_dev->quota -= rx_done;

  osd->poll_call_count++;

  /* Update the MTU if necessary.
   * FIXME: Slow path
   */
  if(CI_UNLIKELY(osd->mtu_changed)) {
    ef_vnic_osd_update_mtu(osd);
    /* Make sure we update descriptors. */
    if(rx_done==0)
      rx_done = 1;
  }

  /* Replenish RX buffers if necessary. */
  if(CI_LIKELY(rx_done!=0)) {
    schedule_work(&osd->rx_refill_work);
  };

  if(CI_UNLIKELY(rx_done < rx_allowed)) {
    /* We've run out of things to process.  Re-arm the interrupt. */
    netif_rx_complete(net_dev);

    /* Enable interrupts in the SVH and check to make sure nothing has
     * arrived since we polled.
     *
     * FIXME: There may be a better scheme than taking a lock here.
     * In Xen, the suspend callback gets called in uni-processor mode,
     * so this code can't run in parallel with ef_vnic_osd_stop.  At
     * least this works and will not call the SVH after
     * ef_vnic_osd_stop has been called.
     */
    spin_lock_irqsave(&osd->stop_lock, flags);
    if(osd->irq_enabled) {
      ef_vnic_bec_enable_interrupts(EF_VNIC_BEC_FROM_OSD(osd));
      if(!ef_vnic_svh_enable_interrupts(EF_VNIC_SVH_FROM_OSD(osd))) {
        VNIC_VERB(ci_log("%s() rescheduling", __FUNCTION__));
        if(netif_rx_schedule_prep(net_dev)) {
          ef_vnic_bec_disable_interrupts(EF_VNIC_BEC_FROM_OSD(osd));
#if 0
          ef_vnic_svh_disable_interrupts(EF_VNIC_SVH_FROM_OSD(osd));
#endif
          __netif_rx_schedule(net_dev);
          osd->poll_reschedule_count++;
        }

      }
    } else {
      ci_log("%s() interrupt disabled", __FUNCTION__);
    }
    spin_unlock_irqrestore(&osd->stop_lock, flags);

    return 0; /* Done */
  }

  return 1; /* More to do. */
}

/*** VNIC functions ******************************************************/

ci_boolean_t ef_vnic_osd_probe(ef_vnic_osd *osd)
{
  struct net_device *net_dev;
  int err;

  ci_log("%s(%p) called", __FUNCTION__, osd);

  memset ( osd, 0, sizeof ( *osd ) );
  /* Make sure memset does the right thing with these types. */
  ci_assert_equal(osd->net_dev, NULL);
  ci_assert_equal(osd->rx_max_fill, 0);
  ci_assert_equal(osd->mtu_changed, CI_FALSE);

  osd->stop_reasons = ( OSD_STOP_REASON_SUSPEND |
                        OSD_STOP_REASON_CLOSED );
  spin_lock_init(&osd->stop_lock);

  sema_init(&osd->rx_fill_mutex, 1);
  osd->rx_max_fill = 150;
  osd->rx_min_fill = ~0;

  skb_queue_head_init(&osd->rx_skb_list);

  /* FIXME */
  EF_VNIC_INF_FROM_OSD(osd)->rx_buffer_length = 2048;
  EF_VNIC_INF_FROM_OSD(osd)->mtu = 1500;

  INIT_WORK(&osd->rx_refill_work, &ef_vnic_osd_rx_refill_work, osd);
  sema_init(&osd->unpost_sem, 0);
  INIT_WORK(&osd->reset_work, &ef_vnic_osd_reset_work, osd);
  sema_init(&osd->reset_sem, 1);

  net_dev = alloc_etherdev(0);
  ci_log("%s: osd=%p  net_dev=%p", __FILE__, osd, net_dev);
  if(net_dev == NULL) {
    ci_log("%s: alloc_etherdev failed", __FUNCTION__);
    goto error;
  }

  osd->net_dev = net_dev;

  net_dev->open               = &ef_vnic_osd_open;
  net_dev->stop               = &ef_vnic_osd_close;
  net_dev->do_ioctl           = &ef_vnic_osd_ioctl;
  net_dev->get_stats          = &ef_vnic_osd_get_stats;
  net_dev->hard_start_xmit    = &ef_vnic_osd_hard_start_xmit;
  net_dev->tx_timeout         = &ef_vnic_osd_tx_timeout;
#if 0
  /* FIXME: Multicast */
  net_dev->set_multicast_list = &ef_vnic_osd_set_multicast_list;
#endif
  net_dev->poll               = &ef_vnic_osd_poll;
  net_dev->weight             = 16;
  net_dev->priv               = osd;
  SET_MODULE_OWNER(net_dev);
  /* FIXME: SET_NETDEV_DEV(net_dev, pdev) */

  ef_vnic_svh_get_mac_addr(EF_VNIC_SVH_FROM_OSD(osd),
                           net_dev->dev_addr);

  err = register_netdev(net_dev);
  ci_log("%s: register_netdev(%p) returned %d", __FUNCTION__, net_dev, err);

  if(err < 0) {
    ci_log("%s: register_netdev failed: %d", __FUNCTION__, err);
    /* Don't unregister the netdev in this case. */
    free_netdev(net_dev);
    osd->net_dev = NULL;
    goto error;
  }

  err = class_device_create_file(&net_dev->class_dev, &class_device_attr_refcnt);

  /* Allow the interface to come up. */
  ef_vnic_osd_resume(osd);
  sysfs_create_group(&net_dev->class_dev.kobj, &accel_attrg);
  return CI_TRUE;

error:
  /* Something's gone wrong.  Remove the interface. */
  ef_vnic_osd_remove(osd);
  return CI_FALSE;
}

void ef_vnic_osd_remove(ef_vnic_osd *osd)
{
  ci_log("%s(%p) called", __FUNCTION__, osd);

  /* Stop all activity. */
  ef_vnic_osd_stop(osd, OSD_STOP_REASON_REMOVE);

  /* Release the device. */
  if(osd->net_dev) {
    struct net_device *net_dev = osd->net_dev;

    /* This stops anyone from touching the device. */
    netif_device_detach(net_dev);

    sysfs_remove_group(&net_dev->class_dev.kobj, &accel_attrg);
    /* Bring the interface down and stop all callbacks from
     * running. */
    unregister_netdev(net_dev);
    net_dev->priv = NULL;

    /* Release our reference. */
    free_netdev(net_dev);
    osd->net_dev = NULL;
  }

  /* Halt interrupts. */
  ef_vnic_svh_disable_interrupts(EF_VNIC_SVH_FROM_OSD(osd));
}

void ef_vnic_osd_suspend(ef_vnic_osd *osd)
{
  ci_log("%s() called", __FUNCTION__);
  ef_vnic_osd_stop(osd, OSD_STOP_REASON_SUSPEND);
}

void ef_vnic_osd_resume(ef_vnic_osd *osd)
{
  ci_log("%s() called", __FUNCTION__);
  ef_vnic_osd_start(osd, OSD_STOP_REASON_SUSPEND);
}

void ef_vnic_osd_interrupt(ef_vnic_osd *osd)
{
  struct net_device *net_dev = osd->net_dev;
  ef_vnic_svh *svh;

  osd->irq_count++;

  if(net_dev==NULL) {
    ci_log("%s: Where did the net_dev for %p go?", __FUNCTION__, osd);
    return;
  };

  if(netif_rx_schedule_prep(net_dev)) {
    svh = EF_VNIC_SVH_FROM_OSD(osd);
    /* Disable interrupt sources.  They will get enabled when polling
     * is complete.
     *
     * TBD: Perhaps this should be done in the back end so that it can
     * be cheaper in the case of card-generated interrupts. */
    ef_vnic_bec_disable_interrupts(EF_VNIC_BEC_FROM_OSD(osd));
#if 0
    ef_vnic_svh_disable_interrupts(svh);
#endif
    if(CI_UNLIKELY(svh->event_count_since_irq > svh->events_per_irq_max)) {
      svh->events_per_irq_max = svh->event_count_since_irq;
    }
    if(svh->event_count_since_irq == 0) {
      osd->useless_irq_count++;
    }
    svh->event_count_since_irq = 0;

    osd->poll_schedule_count++;
    __netif_rx_schedule(net_dev);
  } else {
    ci_rmb();
    /* We arrive here when ef_vnic_osd_stop has called
     * netif_poll_disable.
     *
     * FIXME: I don't know why we get this message.  Interrupts are
     * disabled when we're on the poll list.
     */
    VNIC_VERB(ci_log("Couldn't schedule NAPI poll on %s, irq_enabled=%d",
                     net_dev->name, osd->irq_enabled));
  }
}

void ef_vnic_osd_rx_complete(ef_vnic_osd *osd,
                             ef_vnic_pktbuff *skb,
                             ef_vnic_size_t len)
{
  struct net_device *net_dev;

  ci_assert_equal(skb->len, 0);
  skb_put(skb, len);
  net_dev = osd->net_dev;
  /* To test the tx slow path, we need received packets to appear from
     the tx slowpath net device - use a line like this to achieve
     that */
  /* net_dev = EF_VNIC_BEC_FROM_OSD(osd)->netdev; */
  skb->dev = net_dev;
  skb->protocol = eth_type_trans(skb, net_dev);
  skb->ip_summed = CHECKSUM_UNNECESSARY; /* FIXME: For arrivals from BEND. Need to fix for H/W */
  netif_receive_skb(skb);

  /* Update statistics. */
  osd->stats.rx_packets++;
  osd->stats.rx_bytes += len;
}

void ef_vnic_osd_tx_complete(ef_vnic_osd *osd,
                             ef_vnic_pktbuff *skb)
{
  ci_boolean_t handled;

  VNIC_VERB(ci_log("%s: Returning %p", __FUNCTION__, skb));

  osd->stats.tx_packets++;
  osd->stats.tx_bytes += skb->len;
  dev_kfree_skb_any(skb);

  /* Try to send the packet in hand if there is one. */
  if(CI_UNLIKELY(osd->tx_skb != NULL)) {
    VNIC_VERB(ci_log("%s trying to send spare buffer", __FUNCTION__));
    skb = osd->tx_skb;
    osd->tx_skb = NULL;
    handled = ef_vnic_osd_xmit(osd, skb);
    if(handled) {
      VNIC_VERB(ci_log("%s restarting tx", __FUNCTION__));
      netif_wake_queue(osd->net_dev);
    }
  }
}

void ef_vnic_osd_unposted(ef_vnic_osd *osd,
                          ef_vnic_pktbuff *skb,
                          ci_boolean_t is_tx)
{
  if(is_tx) {
    osd->stats.tx_aborted_errors++;
  }
  dev_kfree_skb_any(skb);
}

void ef_vnic_osd_unposted_all(ef_vnic_osd *osd)
{
  up(&osd->unpost_sem);
}

void ef_vnic_osd_change_link_status(ef_vnic_osd *osd,
                                    ci_boolean_t is_up)
{
  EF_VNIC_INF_FROM_OSD(osd)->is_up = is_up;
  if(is_up) {
    netif_carrier_on(osd->net_dev);
  } else {
    netif_carrier_off(osd->net_dev);
  }
}

void ef_vnic_osd_change_mtu(ef_vnic_osd *osd,
                            ef_vnic_size_t mtu,
                            ef_vnic_size_t rx_size)
{
  EF_VNIC_INF_FROM_OSD(osd)->mtu = mtu;
  EF_VNIC_INF_FROM_OSD(osd)->rx_buffer_length = rx_size;
  osd->mtu_changed = CI_TRUE;
}

/*
 * Local variables:
 *  c-basic-offset: 2
 *  c-indent-level: 2
 *  tab-width: 8
 * End:
 */
