/**************************************************************************\
*//*! \file svh_ef1.c Etherfabric semi-virtualised hardware functions

Copyright 2006 Solarflare Communications Inc,
               9501 Jeronimo Road, Suite 250,
               Irvine, CA 92618, USA

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License version 2 as published by the Free
Software Foundation, incorporated herein by reference.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*//*
\**************************************************************************/

#include <ci/driver/virtual/vnic.h>
#include <etherfabric/vi.h>
#include <etherfabric/tools.h>
#include "ef_vnic_bufs.h"
#include "ci/xen/ef_cuckoo_hash.h"


static
void ef_vnic_ef1_dtor(ef_vnic_svh *svh)
{
  /* FIXME: Private... */
  ef_hyperop_unmap_contig(EF_VNIC_BEC_FROM_SVH(svh)->dev,
                          svh->evq_mapping);
  iounmap(svh->evq_ptr_page);
  iounmap(svh->evq_timer_page);
  kfree(svh->vi.ep_rx_ids);
  iounmap(svh->vi.ep_dma_rx_q.dma_kva);
  iounmap(svh->vi.ep_dma_rx_q.doorbell_kva);
  kfree(svh->vi.ep_tx_ids);
  iounmap(svh->vi.ep_dma_tx_q.dma_kva);
  iounmap(svh->vi.ep_dma_tx_q.doorbell_kva);
  ef_vnic_fini_bufs(svh->bufs);

  /* Finished with this hash table */
  ef_cuckoo_hash_destroy(&svh->local_macs_table);
}

static
int ef_vnic_ef1_ctor(ef_vnic_svh *svh, struct ef_msg_hw *hw_msg)
{
  void *evq_timer;
  void *evq_timer_page, *evq_ptr_page;
  char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};

  /* This SVH is being invoked because the hardware type matches, so
  * something is deeply wrong if this isn't an EF1. */
  ci_assert(hw_msg->type == MSG_HWTYPE_EF1);

  /* Create local mac table, initial size 8 */
  if(ef_cuckoo_hash_init(&svh->local_macs_table, 3)){;
    ci_log("failed to allocate local mac table");
    goto hashtable_fail;
  }
  /* Add the broadcast mac address to the table so that such things
     are always punted onto slow path */
  ef_cuckoo_hash_add(&svh->local_macs_table, bcast_mac, 0, 1);
  /* And create the local macs table lock */
  spin_lock_init(&svh->local_macs_lock);

  /* Allocate buffer state */
  svh->bufs = ef_vnic_init_bufs();
  if (svh->bufs == NULL)
    goto bufs_fail;

  svh->vi.ep_state = &svh->ep_state;
  /* Set up the Tx DMA Q from the message. */
  svh->vi.ep_dma_tx_q.handle=0xdeadbeef;
  svh->vi.ep_dma_tx_q.doorbell_kva =
      ioremap_nocache(hw_msg->resources.ef1.txbell << PAGE_SHIFT, PAGE_SIZE);
  if(svh->vi.ep_dma_tx_q.doorbell_kva == NULL)
    goto tx_doorbell_fail;
  svh->vi.ep_dma_tx_q.dma_kva =
      ioremap_nocache(hw_msg->resources.ef1.txdmaq << PAGE_SHIFT, PAGE_SIZE);
  if(svh->vi.ep_dma_tx_q.dma_kva == NULL)
    goto tx_dma_fail;
  svh->vi.ep_dma_tx_q.dmaq =  hw_msg->resources.ef1.txid;
  svh->vi.ep_tx_ids = kmalloc(sizeof(ef_vi_dma_id_fifo) + 
      EF_TX_ID_FIFO_SIZE * sizeof(svh->vi.ep_tx_ids->fifo[0]), GFP_ATOMIC);
  if(svh->vi.ep_tx_ids == NULL)
    goto tx_ids_fail;
  ci_fifo2_init(svh->vi.ep_tx_ids, EF_TX_ID_FIFO_SIZE - 1);

  /* And the Rx DMA Q */
  svh->vi.ep_dma_rx_q.handle=0xdeadbeef;
  svh->vi.ep_dma_rx_q.doorbell_kva = 
      ioremap_nocache(hw_msg->resources.ef1.rxbell << PAGE_SHIFT, PAGE_SIZE);
  if(svh->vi.ep_dma_rx_q.doorbell_kva == NULL)
    goto rx_doorbell_fail;
  svh->vi.ep_dma_rx_q.dma_kva = 
      ioremap_nocache(hw_msg->resources.ef1.rxdmaq << PAGE_SHIFT, PAGE_SIZE);
  if(svh->vi.ep_dma_rx_q.dma_kva == NULL)
    goto rx_dma_fail;
  svh->vi.ep_dma_rx_q.dmaq =  hw_msg->resources.ef1.rxid;
  svh->vi.ep_rx_ids = kmalloc(sizeof(ef_vi_dma_id_fifo) + 
      2 * EF_RX_ID_FIFO_SIZE * sizeof(svh->vi.ep_rx_ids->fifo[0]), GFP_ATOMIC);
  if(svh->vi.ep_rx_ids == NULL)
    goto rx_ids_fail;
  ci_fifo2_init(svh->vi.ep_rx_ids, 2 * EF_RX_ID_FIFO_SIZE - 1);

  /* And the hardware portion of the event Q */
  evq_timer_page = ioremap_nocache(hw_msg->resources.ef1.evq_timer & PAGE_MASK, 1);
  if(evq_timer_page == NULL)
    goto evq_timer_fail;
  svh->evq_timer_page = evq_timer_page;
  evq_timer = (void*)((unsigned long)evq_timer_page |
                      (hw_msg->resources.ef1.evq_timer & (~PAGE_MASK)));

  /* And the eventq pointer register */
  evq_ptr_page = ioremap_readonly(hw_msg->resources.ef1.evq_ptr & PAGE_MASK, 1);
  if(evq_ptr_page == NULL)
    goto evq_ptr_fail;
  svh->evq_ptr_page = evq_ptr_page;
  svh->evq_ptr = (void *)((unsigned long)evq_ptr_page | 
                          (hw_msg->resources.ef1.evq_ptr & (~PAGE_MASK)));

  /* Full confession */
  ci_log("Mapped H/W"
         "  Tx DMAQ page %x -> %p\n"
         "  Rx DMAQ page %x -> %p\n"
         "  Tx bell page %x -> %p\n"
         "  Rx bell page %x -> %p\n"
         "  Tx DMA Q # %d\n"
         "  Rx DMA Q # %d\n"
         "  EVQ timer %x -> %p\n"
         "  EVQ ptr %x -> %p\n",
         hw_msg->resources.ef1.txdmaq, svh->vi.ep_dma_tx_q.dma_kva,
         hw_msg->resources.ef1.rxdmaq, svh->vi.ep_dma_rx_q.dma_kva,
         hw_msg->resources.ef1.txbell, svh->vi.ep_dma_tx_q.doorbell_kva,
         hw_msg->resources.ef1.rxbell, svh->vi.ep_dma_rx_q.doorbell_kva,
         hw_msg->resources.ef1.txid, hw_msg->resources.ef1.rxid,
         hw_msg->resources.ef1.evq_timer, evq_timer,
         hw_msg->resources.ef1.evq_ptr, svh->evq_ptr
        );

  efab_dma_tx_init_ip_q_state(&svh->txqs, EFAB_DMA_Q_DEFAULT_TX_SIZE);
  efab_dma_rx_init_q_state(&svh->rxqs, EFAB_DMA_Q_DEFAULT_RX_SIZE);
  ci_log("Will map evq %d pages", 1 << hw_msg->resources.ef1.evq_order);

  /* Now do something for the Event Q */
  svh->evq.evq_base =
    ef_map_grants_contig(EF_VNIC_BEC_FROM_SVH(svh)->dev,
                         hw_msg->resources.ef1.evq_mem.gnts,
                         1 << hw_msg->resources.ef1.evq_order,
                         &svh->evq_mapping);
  if(svh->evq.evq_base == NULL)
    goto evq_fail;
  svh->evq.evq_mask =
    (1 << (hw_msg->resources.ef1.evq_order + PAGE_SHIFT)) - 1;
  svh->evq.evq_state = &svh->evq_state;
  svh->evq.evq_state->evq_ptr = 0;
  svh->evq.evq_timer_reg = evq_timer;
  svh->evq.evq_handle = 0xdeadbeef;

  /* Note which NIC port we transmit on. */
  svh->phys_port = hw_msg->resources.ef1.phys_port;

  svh->buffer_requests_left = EF_BUF_MAX_PAGES;
  return 0;

evq_fail:
  iounmap(svh->evq_ptr_page);
evq_ptr_fail:
  iounmap(svh->evq_timer_page);
evq_timer_fail:
  kfree(svh->vi.ep_rx_ids);
rx_ids_fail:
  iounmap(svh->vi.ep_dma_rx_q.dma_kva);
rx_dma_fail:
  iounmap(svh->vi.ep_dma_rx_q.doorbell_kva);
rx_doorbell_fail:
  kfree(svh->vi.ep_tx_ids);
tx_ids_fail:
  iounmap(svh->vi.ep_dma_tx_q.dma_kva);
tx_dma_fail:
  iounmap(svh->vi.ep_dma_tx_q.doorbell_kva);
tx_doorbell_fail:
  ef_vnic_fini_bufs(svh->bufs);
bufs_fail:
  ef_cuckoo_hash_destroy(&svh->local_macs_table);
hashtable_fail:
  return -EIO;
}

static
void ef_vnic_ef1_post_rx(ef_vnic_svh *svh, __u16 id,
                         ef_vnic_pkt_desc *buf)
{
  int idx;

  /* Fill in the descriptor */
  buf->u.pkt_os_buff = NULL;

  idx = svh->rx_dma_batched;

  VNIC_VERB(ci_log("Posting buffer %d (0x%08x) for rx at index %d",
                   id, buf->pkt_buff_addr, idx));

  /* Set up a virtual buffer descriptor */
  efab_dma_rx_calc_buf(buf->pkt_buff_addr, &svh->rx_batch_descs[idx]);

  /* Post buffer id to the fifo */
  ci_fifo2_put(svh->vi.ep_rx_ids, id);

  idx++;

  svh->rx_dma_level++;

  /* Only push the descriptor to the card if we've reached the batch
   * size.  Otherwise, the descriptors can sit around for a while.
   * There will be plenty available. */
  if(idx >= EF_VNIC_RX_DESC_BATCH ||
     svh->rx_dma_level < EF_VNIC_RX_DESC_BATCH) {
    VNIC_VERB(ci_log("Flushing %d rx descriptors.", idx));

    /* Push buffer to hardware */
    efab_dma_rx_push_n(&svh->vi.ep_dma_rx_q, &svh->rxqs,
                       svh->rx_batch_descs, idx);

    /* Advance the fifo index */
    ci_fifo2_rd_adv(&svh->rxqs, idx);

    idx = 0;
  }

  svh->rx_dma_batched = idx;
}

ci_inline
void ef_vnic_ef1_post_rx_or_free(ef_vnic_svh *svh, __u16 id,
                                 ef_vnic_pkt_desc *buf)
{
  if(svh->rx_dma_level >= svh->rx_dma_max) {
    VNIC_VERB(ci_log("Completed buffer %d is released", id));
    ef_vnic_buf_put(svh->bufs, id);
  } else {
    VNIC_VERB(ci_log("Completed buffer %d is reposted", id));
    ef_vnic_ef1_post_rx(svh, id, buf);
  }
}

ci_inline
void ef_vnic_ef1_rx_dma_fill(ef_vnic_svh *svh)
{
  while(svh->rx_dma_level < svh->rx_dma_max) {
    __u16 id;
    ef_vnic_pkt_desc *buf;

    /* Try to allocate a buffer. */
    buf = ef_vnic_buf_get(svh->bufs, &id);

    if(buf == NULL)
      break;

    /* Add it to the rx dma queue. */
    ef_vnic_ef1_post_rx(svh, id, buf);
  }
}

void ef_vnic_ef1_add_bufs(ef_vnic_svh *svh, struct ef_msg *msg)
{
  int capacity;

  ef_vnic_add_bufs(svh->bufs, msg);
  /* This works out at slightly more than 2/3.  Never post more than
   * the receive capacity. */
  capacity = EF_RX_ID_FIFO_SIZE-1;
  /* FIXME: capacity = ef_vi_receive_capacity(&svh->vi) */
  svh->rx_dma_max = CI_MIN((svh->bufs->npages*11) >> 3, capacity);
  ef_vnic_ef1_rx_dma_fill(svh);
}


static
enum ef_vnic_post_status ef_vnic_ef1_tx_post(ef_vnic_svh *svh,
                                             ef_vnic_pktbuff *pktbuff)
{
  struct ef_vnic_pkt_desc *buf;
  __u16 pkt_id;
  ef_vi_transmit_desc desc;
  ci_uint8 *kva;
  ef_vnic_size_t pkt_len;
  int n;

  /* FIXME: Reserve space in the DMA queue. */

  pkt_len = ef_vnic_pktbuff_get_total_length(pktbuff);
  if(pkt_len > PAGE_SIZE/2) {
    ci_log("%s() returning EF_VNIC_STATUS_FAIL", __FUNCTION__);
    return EF_VNIC_STATUS_FAIL;
  }

  buf = ef_vnic_buf_get(svh->bufs, &pkt_id);
  if(buf == NULL) {
    VNIC_VERB(ci_log("%s() returning EF_VNIC_STATUS_BUSY", __FUNCTION__));
    svh->fastpath_tx_busy++;
    return EF_VNIC_STATUS_BUSY;
  }

  /* Track number of tx fastpath stats */
  svh->fastpath_tx_pkts++;
  svh->fastpath_tx_bytes += pkt_len;

  n = svh->fastpath_tx_pkts - svh->fastpath_tx_completions;
  if(CI_UNLIKELY(n > svh->fastpath_tx_pending_max)) {
    svh->fastpath_tx_pending_max = n;
  }

  /* Store the context */
  buf->u.pkt_os_buff = pktbuff;

  /* Stick buffer pkt_id into the fifo */
  ci_fifo2_put(svh->vi.ep_tx_ids, pkt_id);

  kva = buf->pkt_kva;
  EF_VNIC_PKTBUFF_FOR_EACH_FRAGMENT(pktbuff, idx, frag_data, frag_len, {
      /* Copy in payload */
      VNIC_VERB(ci_log("*** Copying %d bytes to %p", frag_len, kva));
      memcpy(kva, frag_data, frag_len);
      kva += frag_len;
    });

  VNIC_VERB(ci_log("%s: id %d pkt %p kva %p buff_addr 0x%08x", __FUNCTION__,
                   pkt_id, buf, buf->pkt_kva, buf->pkt_buff_addr));

  /* Set up a virtual buffer descriptor */
  efab_dma_tx_calc_buf(buf->pkt_buff_addr, pkt_len,
                       svh->phys_port, 0, &desc);
  /* Push buffer to hardware */
  efab_dma_tx_push(&svh->vi.ep_dma_tx_q,
                   &svh->txqs, &desc); 
  /* Advance fifo index */
  ci_fifo2_rd_adv(&svh->txqs, 1);

  return EF_VNIC_STATUS_GOOD;
}

static
void ef_vnic_ef1_unpost_all(ef_vnic_svh *svh)
{
  ci_log("%s: FIXME", __FUNCTION__);
}

#define EVENT_NUM(_ev) (CI_BSWAP_LE32(_ev->ev1002.code) & 0x3ff)

static
int  ef_vnic_ef1_poll(ef_vnic_svh *svh,
                      int rx_packets)
{
  ef_event* ev;
  int rx_done = 0;
#ifndef NDEBUG
  static int last_rx = 0;
  static int last_tx = 0;
  int port;
#endif

  /* Start the iteration of the eventq */
  ef_eventq_iter_start(&svh->evq, &ev);

  /* Loop over each event */
  while( EF_IS_EVENT(*ev) ) {
    svh->event_count++;
    svh->event_count_since_irq++;
    VNIC_VERB(ci_log("%s: Event type 0x%x", __FUNCTION__, EF_EVENT_TYPE(*ev)));
    if( EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX ) {
      struct ef_vnic_bufinfo *bufinfo = svh->bufs;
      struct ef_vnic_pkt_desc *buf;
      ef_vnic_pktbuff *pktbuff;
      int id ;
      int len;

      VNIC_VERB(ci_log("Rx event."));
      /* Complete the receive operation, and get the request id of the
         buffer */
      id = ef_vi_receive_done(&svh->vi, *ev);
      ci_assert(id != (ci_uint16)-1);
      len = EF_EVENT_BYTES(*ev);
#ifndef NDEBUG
      port = EF_EVENT_RX_PORT(*ev); 
      if(port != svh->phys_port)
        ci_log("Packet received on wrong physical port.  Check wiring and xenbus config!");
#endif
      /* FIXME: csum = EF_EVENT_DMA_RX_CHECKSUM_OKAY(*ev); */
      /* FIXME: isok = EF_EVENT_DMA_RX_OKAY(*ev); */
      /* FIXME: if(!isok) falcon_recovery; */
      VNIC_VERB(ci_log("Rx ID %d complete, #%d: %d bytes", id,
                       EVENT_NUM(ev), len));
      if(id < 0 || id >= bufinfo->npages*2) {
        ci_log("Rx ID %d is invalid", id);
        /* Carry on round the loop if more events */
        ef_eventq_iter_next(&svh->evq, &ev);
        continue;
      }
#ifndef NDEBUG
      if (CI_UNLIKELY(EVENT_NUM(ev) != last_rx)) {
        ci_log("??? Expected rx event %d, got %d", last_rx,EVENT_NUM(ev));
      }
      last_rx = (last_rx + 1) & 0x3ff;
#endif
      /* Get our buffer descriptor */
      buf = ef_vnic_buf_find(bufinfo, id);
      pktbuff = ef_vnic_osd_new_rx_buffer(EF_VNIC_OSD_FROM_SVH(svh), len);
      if(pktbuff == NULL) {
        ci_log("%s: Couldn't get an Rx buffer.", __FUNCTION__);
        ef_vnic_buf_put(svh->bufs, (__u16)id);
        /* Carry on round the loop if more events */
        ef_eventq_iter_next(&svh->evq, &ev);
        continue;
      }

      /* Copy the data to required end destination */
      {
        int i;
        char c;
        int pkt_stride = svh->rx_pkt_stride;
        int skb_stride = svh->rx_skb_stride;
        char *skb_start;

        skb_start = ef_vnic_pktbuff_get_header_data(pktbuff);
        if(pkt_stride) {
          for(i=0; i<len ;i+=pkt_stride) {
            c += ((volatile char*)(buf->pkt_kva))[i];
          }
        }
        if(skb_stride) {
          for(i=0; i<len ;i+=skb_stride) {
            c += ((volatile char*)(skb_start))[i];
          }
        }
      }

      memcpy(ef_vnic_pktbuff_get_header_data(pktbuff), buf->pkt_kva, len);

      /* Pass the packet up the network stack */
      ef_vnic_osd_rx_complete(EF_VNIC_OSD_FROM_SVH(svh), pktbuff, len);

      /* An RX buffer has been removed from the DMA ring. */
      svh->rx_dma_level--;

      /* Put the buffer back in the DMA queue. */
      ef_vnic_ef1_post_rx_or_free(svh, (__u16)id, buf);

      /* Track number of rx fastpath packets */
      svh->fastpath_rx_pkts++;
      svh->fastpath_rx_bytes += len;

      rx_done++;
      if(rx_done >= rx_packets) {
        ef_eventq_iter_next(&svh->evq, &ev);
        break;
      }

    } else if(EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_TX) {
      struct ef_vnic_pkt_desc *buf;
      int id;

      /* FIXME: Falcon: if(ef_eventq_is_duplicate(*ev))... */

      /* Get the request ids for this tx completion event.  FIXME: Can
       * we support batching? */
      ef_vi_transmit_unbundle(&svh->vi, *ev, &id, 1);
      VNIC_VERB(ci_log("Tx ID %d complete, # %d.", id, EVENT_NUM(ev)));
#ifndef NDEBUG
      if (CI_UNLIKELY(EVENT_NUM(ev) != last_tx)) {
        ci_log("??? Expected tx event %d, got %d", last_tx,EVENT_NUM(ev));
      }
      last_tx = (last_tx + 1) & 0x3ff;
#endif
      buf = ef_vnic_buf_find(svh->bufs, id);
      svh->fastpath_tx_completions++;
      /* Internal callback to process tx completion */ 
      ef_vnic_osd_tx_complete(EF_VNIC_OSD_FROM_SVH(svh),
                              buf->u.pkt_os_buff);
      /* Release the buffer descriptor */
      ef_vnic_ef1_post_rx_or_free(svh, (__u16)id, buf);
    } else if( EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_ERROR ) {
      /* FIXME: Do some work. */
      ci_fail(("Error."));
    } else if( EF_EVENT_IGNORE(*ev) ) {
      /* Do nothing. */
      ci_log("Ignore.");
    } else {
      ci_log("Unexpected event " EF_EVENT_FMT, EF_EVENT_PRI_ARG(*ev));
    }
    /* Carry on round the loop if more events */
    ef_eventq_iter_next(&svh->evq, &ev);
  }

  return rx_done;
}

static
void ef_vnic_ef1_updated_mtu(ef_vnic_svh *svh)
{
  ci_log("%s: FIXME", __FUNCTION__);
}

static
ci_boolean_t ef_vnic_ef1_check_interrupts(ef_vnic_svh *svh)
{
  ef_event* ev;
  ef_eventq_iter_start(&svh->evq, &ev);
  return EF_IS_EVENT(*ev);
}

static
ci_boolean_t ef_vnic_ef1_enable_interrupts(ef_vnic_svh *svh)
{
  ci_uint32 sw_evq_ptr;
  ci_uint32 hw_evq_ptr;
  ef_event* ev;

  ef_eventq_iter_start(&svh->evq, &ev);

  /* Do a quick check for an event. */
  if(EF_IS_EVENT(*ev))
    return CI_FALSE;

  /* Request an immediate timeout when an event arrives. */
  ef_eventq_timer_zero(&svh->evq);

  /* Race breaker.  This isn't really sufficient, but seems to be the
     best we've got on EF1.  Theory of race is that the evq_ptr we
     read (below) is from the cache, and so out of date. Better
     solution would be to write to some unused event queue pointer to
     flush the cache. This would require domU write access to a page
     of unused event queues. */
  ef_eventq_timer_sync(&svh->evq);

  /* Find out where the hardware things the evq pointer is at */
  hw_evq_ptr = ((*(volatile ci_uint32 *)(svh->evq_ptr)) << 3) & svh->evq.evq_mask;
  sw_evq_ptr = svh->evq_state.evq_ptr & svh->evq.evq_mask;

  /* compare that to the software's view of things */
  if(sw_evq_ptr != hw_evq_ptr){
    /* Avoid race */
    return CI_FALSE;
  }

  return CI_TRUE;
}

static
void ef_vnic_ef1_disable_interrupts(ef_vnic_svh *svh)
{
  ef_eventq_timer_clear(&svh->evq);
}


ef_vnic_svh_functions ef_vnic_svh_ef1 = {
  &ef_vnic_ef1_ctor,
  &ef_vnic_ef1_dtor,
  &ef_vnic_ef1_add_bufs,
  &ef_vnic_ef1_tx_post,
  &ef_vnic_ef1_unpost_all,
  &ef_vnic_ef1_poll,
  &ef_vnic_ef1_updated_mtu,
  &ef_vnic_ef1_check_interrupts,
  &ef_vnic_ef1_enable_interrupts,
  &ef_vnic_ef1_disable_interrupts
};
