[Xen-devel] [PATCH 1/4] add scsi-target and IO_CMD_EPOLL_WAIT pa

This includes two kernel patches, scsi-target and IO_CMD_EPOLL_WAIT.

The former is a modified version of the scsi target infrastructure in
mainline.

The latter enables applications to handle AIO and non-AIO fds in the
same loop. blktap uses the different patch, AIO event queue patch for
the same aim. The IO_CMD_EPOLL_WAIT patch will be merged into mainline
(and the AIO event queue will not) so the scsi target daemon uses it.


Signed-off-by: FUJITA Tomonori <fujita.tomonori@xxxxxxxxxxxxx>

diff -r 489f28021f26 -r 105d5d6b4e0d patches/linux-2.6.16.33/series
--- a/patches/linux-2.6.16.33/series    Wed Dec 20 09:47:24 2006 +0000
+++ b/patches/linux-2.6.16.33/series    Wed Jan 03 01:34:02 2007 +0900
@@ -33,3 +33,5 @@ x86-elfnote-as-preprocessor-macro.patch
 x86-elfnote-as-preprocessor-macro.patch
 vsnprintf.patch
 kasprintf.patch
+epoll-aio.patch
+scsi-target.patch
diff -r 489f28021f26 -r 105d5d6b4e0d patches/linux-2.6.16.33/epoll-aio.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.33/epoll-aio.patch   Wed Jan 03 01:34:02 2007 +0900
@@ -0,0 +1,241 @@
+diff --git a/fs/aio.c b/fs/aio.c
+index aec2b19..a1a4c2c 100644
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -29,6 +29,7 @@ #include <linux/aio.h>
+ #include <linux/highmem.h>
+ #include <linux/workqueue.h>
+ #include <linux/security.h>
++#include <linux/eventpoll.h>
+ 
+ #include <asm/kmap_types.h>
+ #include <asm/uaccess.h>
+@@ -812,6 +813,7 @@ static void aio_queue_work(struct kioctx
+               timeout = 1;
+       else
+               timeout = HZ/10;
++      timeout = 1;
+       queue_delayed_work(aio_wq, &ctx->wq, timeout);
+ }
+ 
+@@ -1435,6 +1437,9 @@ static ssize_t aio_setup_iocb(struct kio
+               if (file->f_op->aio_fsync)
+                       kiocb->ki_retry = aio_fsync;
+               break;
++      case IOCB_CMD_EPOLL_WAIT:
++              kiocb->ki_retry = eventpoll_aio_wait;
++              break;
+       default:
+               dprintk("EINVAL: io_submit: no operation provided\n");
+               ret = -EINVAL;
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 4284cd3..3aca096 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -34,6 +34,7 @@ #include <linux/wait.h>
+ #include <linux/eventpoll.h>
+ #include <linux/mount.h>
+ #include <linux/bitops.h>
++#include <linux/aio.h>
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+ #include <asm/io.h>
+@@ -706,6 +707,150 @@ eexit_1:
+       return error;
+ }
+ 
++static void eventpoll_aio_timer(unsigned long data)
++{
++      struct kiocb *iocb = (struct kiocb *)data;
++      struct timer_list *timer = (struct timer_list *)iocb->private;
++      struct file *file = iocb->ki_filp;
++      struct eventpoll *ep = (struct eventpoll *)file->private_data;
++      unsigned long flags;
++
++      (void)del_timer(timer);
++      write_lock_irqsave(&ep->lock, flags);
++      __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
++      write_unlock_irqrestore(&ep->lock, flags);
++}
++
++static int aio_epoll_cancel(struct kiocb *iocb, struct io_event *event)
++{
++      struct file *file = iocb->ki_filp;
++      struct eventpoll *ep = (struct eventpoll *)file->private_data;
++      int ret = -1;
++      struct list_head *list;
++      int seen = 0;
++
++      write_lock_irq(&ep->lock);
++
++      if (iocb->private)
++              del_timer((struct timer_list *)iocb->private);
++      /*
++       *  We need to know whether the event was removed from the wait
++       *  queue in order to return the proper status to the cancellation
++       *  code.
++       */
++      list = &ep->wq.task_list;
++
++      do {
++              struct list_head *next;
++              if (list == &iocb->ki_wait.task_list)
++                      seen++;
++              next = list->next;
++              if (next->prev != list) {
++                      seen += 2;
++                      break;
++              }
++              list = next;
++      } while (list != &ep->wq.task_list);
++
++      if (seen == 1) {
++              __remove_wait_queue(&ep->wq, &iocb->ki_wait);
++              ret = 0;
++      }
++      write_unlock_irq(&ep->lock);
++
++      if (ret == 0) {
++              /* successfully cancelled request */
++              kfree(iocb->private);
++              iocb->private = NULL;
++              /* drop the i/o reference */
++              aio_put_req(iocb);
++      } else
++              ret = -EAGAIN;
++
++      event->res = event->res2 = 0;
++      /* drop the cancel reference */
++      aio_put_req(iocb);
++
++      return ret;
++}
++
++/*
++ * iocb->ki_nbytes -- number of events
++ * iocb->ki_pos    -- relative timeout in milliseconds
++ * iocb->private   -- NULL first go;  after that, it's set to the the
++ *                    absolute timeout in jiffies.
++ */
++ssize_t eventpoll_aio_wait(struct kiocb *iocb)
++{
++      struct file *file = iocb->ki_filp;
++      ssize_t ret = -EINVAL;
++      int relative_ms;
++      unsigned long expires;
++      unsigned long now;
++      struct timer_list *timer;
++
++      if (!is_file_epoll(file) || iocb->ki_nbytes > MAX_EVENTS ||
++          iocb->ki_nbytes <= 0)
++              return -EINVAL;
++
++      if (!iocb->private) {
++              /*
++               *  Note that we unconditionally allocate a timer, but we
++               *  only use it if a timeout was specified.  Otherwise, it
++               *  is just a holder for the "infinite" value.
++               */
++              timer = kmalloc(sizeof(struct timer_list), GFP_KERNEL);
++              if (!timer)
++                      return -ENOMEM;
++
++              if ((long)iocb->ki_pos < 0 || iocb->ki_pos >= EP_MAX_MSTIMEO)
++                      expires = MAX_SCHEDULE_TIMEOUT;
++              else
++                      expires = jiffies + msecs_to_jiffies(iocb->ki_pos);
++
++              init_timer(timer);
++              timer->function = eventpoll_aio_timer;
++              timer->data = (unsigned long)iocb;
++              timer->expires = expires;
++      } else {
++              timer = (struct timer_list *)iocb->private;
++              expires = timer->expires;
++      }
++
++      now = jiffies;
++      if (timer->expires == MAX_SCHEDULE_TIMEOUT)
++              relative_ms = EP_MAX_MSTIMEO;
++      else if (time_before(now, expires))
++              relative_ms = jiffies_to_msecs(expires - now);
++      else
++              relative_ms = 0;
++
++      iocb->ki_cancel = aio_epoll_cancel;
++      ret = ep_poll(file->private_data,
++                    (struct epoll_event __user *)iocb->ki_buf,
++                    iocb->ki_nbytes, relative_ms);
++
++      /*
++       *  If a timeout was specified, ep_poll returned retry, and we have
++       *  not yet registered a timer, go ahead and register one.
++       */
++      if (ret == -EIOCBRETRY && !iocb->private) {
++              iocb->private = timer;
++              add_timer(timer);
++      }
++
++      /*
++       *  Did we get any events?
++       */
++      if (ret >= 0) {
++              iocb->ki_cancel = NULL;
++              (void)del_timer(timer);
++              kfree(timer);
++              iocb->private = NULL;
++      }
++
++      return ret;
++}
+ 
+ /*
+  * Creates the file descriptor to be used by the epoll interface.
+@@ -1518,6 +1663,12 @@ retry:
+ 
+       res = 0;
+       if (list_empty(&ep->rdllist)) {
++              if (in_aio() && jtimeout) {
++                      __add_wait_queue(&ep->wq, current->io_wait);
++                      res = -EIOCBRETRY;
++                      write_unlock_irqrestore(&ep->lock, flags);
++                      goto out;
++              }
+               /*
+                * We don't have any available event to return to the caller.
+                * We need to sleep here, and we will be wake up by
+@@ -1562,7 +1713,7 @@ retry:
+       if (!res && eavail &&
+           !(res = ep_events_transfer(ep, events, maxevents)) && jtimeout)
+               goto retry;
+-
++out:
+       return res;
+ }
+ 
+diff --git a/include/linux/aio_abi.h b/include/linux/aio_abi.h
+index 30fdcc8..bb67d5b 100644
+--- a/include/linux/aio_abi.h
++++ b/include/linux/aio_abi.h
+@@ -41,6 +41,7 @@ enum {
+        * IOCB_CMD_POLL = 5,
+        */
+       IOCB_CMD_NOOP = 6,
++      IOCB_CMD_EPOLL_WAIT = 9,
+ };
+ 
+ /* read() from /dev/aio returns these structures. */
+diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
+index 1289f0e..8b6678e 100644
+--- a/include/linux/eventpoll.h
++++ b/include/linux/eventpoll.h
+@@ -57,6 +57,9 @@ void eventpoll_init_file(struct file *fi
+ /* Used to release the epoll bits inside the "struct file" */
+ void eventpoll_release_file(struct file *file);
+ 
++/* Used to provide epoll_wait() to sys_io_submit() */
++ssize_t eventpoll_aio_wait(struct kiocb *iocb);
++
+ /*
+  * This is called from inside fs/file_table.c:__fput() to unlink files
+  * from the eventpoll interface. We need to have this facility to cleanup
diff -r 489f28021f26 -r 105d5d6b4e0d patches/linux-2.6.16.33/scsi-target.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.33/scsi-target.patch Wed Jan 03 01:34:02 2007 +0900
@@ -0,0 +1,3571 @@
+diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
+index 1ce88cf..c631d5a 100644
+--- a/block/ll_rw_blk.c
++++ b/block/ll_rw_blk.c
+@@ -2265,6 +2265,84 @@ void blk_insert_request(request_queue_t
+ 
+ EXPORT_SYMBOL(blk_insert_request);
+ 
++static int __blk_rq_unmap_user(struct bio *bio)
++{
++      int ret = 0;
++
++      if (bio) {
++              if (bio_flagged(bio, BIO_USER_MAPPED))
++                      bio_unmap_user(bio);
++              else
++                      ret = bio_uncopy_user(bio);
++      }
++
++      return ret;
++}
++
++static int __blk_rq_map_user(request_queue_t *q, struct request *rq,
++                           void __user *ubuf, unsigned int len)
++{
++      unsigned long uaddr;
++      struct bio *bio, *orig_bio;
++      int reading, ret;
++
++      reading = rq_data_dir(rq) == READ;
++
++      /*
++       * if alignment requirement is satisfied, map in user pages for
++       * direct dma. else, set up kernel bounce buffers
++       */
++      uaddr = (unsigned long) ubuf;
++      if (!(uaddr & queue_dma_alignment(q)) && !(len & 
queue_dma_alignment(q)))
++              bio = bio_map_user(q, NULL, uaddr, len, reading);
++      else
++              bio = bio_copy_user(q, uaddr, len, reading);
++
++      if (IS_ERR(bio)) {
++              return PTR_ERR(bio);
++      }
++
++      orig_bio = bio;
++      blk_queue_bounce(q, &bio);
++      /*
++       * We link the bounce buffer in and could have to traverse it
++       * later so we have to get a ref to prevent it from being freed
++       */
++      bio_get(bio);
++
++      /*
++       * for most (all? don't know of any) queues we could
++       * skip grabbing the queue lock here. only drivers with
++       * funky private ->back_merge_fn() function could be
++       * problematic.
++       */
++      spin_lock_irq(q->queue_lock);
++      if (!rq->bio)
++              blk_rq_bio_prep(q, rq, bio);
++      else if (!q->back_merge_fn(q, rq, bio)) {
++              ret = -EINVAL;
++              spin_unlock_irq(q->queue_lock);
++              goto unmap_bio;
++      } else {
++              rq->biotail->bi_next = bio;
++              rq->biotail = bio;
++
++              rq->nr_sectors += bio_sectors(bio);
++              rq->hard_nr_sectors = rq->nr_sectors;
++              rq->data_len += bio->bi_size;
++      }
++      spin_unlock_irq(q->queue_lock);
++
++      return bio->bi_size;
++
++unmap_bio:
++      /* if it was boucned we must call the end io function */
++      bio_endio(bio, bio->bi_size, 0);
++      __blk_rq_unmap_user(orig_bio);
++      bio_put(bio);
++      return ret;
++}
++
+ /**
+  * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
+  * @q:                request queue where request should be inserted
+@@ -2286,42 +2364,44 @@ EXPORT_SYMBOL(blk_insert_request);
+  *    unmapping.
+  */
+ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
+-                  unsigned int len)
++                  unsigned long len)
+ {
+-      unsigned long uaddr;
+-      struct bio *bio;
+-      int reading;
++      unsigned long bytes_read = 0;
++      int ret;
+ 
+       if (len > (q->max_hw_sectors << 9))
+               return -EINVAL;
+       if (!len || !ubuf)
+               return -EINVAL;
+ 
+-      reading = rq_data_dir(rq) == READ;
++      while (bytes_read != len) {
++              unsigned long map_len, end, start;
+ 
+-      /*
+-       * if alignment requirement is satisfied, map in user pages for
+-       * direct dma. else, set up kernel bounce buffers
+-       */
+-      uaddr = (unsigned long) ubuf;
+-      if (!(uaddr & queue_dma_alignment(q)) && !(len & 
queue_dma_alignment(q)))
+-              bio = bio_map_user(q, NULL, uaddr, len, reading);
+-      else
+-              bio = bio_copy_user(q, uaddr, len, reading);
++              map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE);
++              end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1)
++                                                              >> PAGE_SHIFT;
++              start = (unsigned long)ubuf >> PAGE_SHIFT;
+ 
+-      if (!IS_ERR(bio)) {
+-              rq->bio = rq->biotail = bio;
+-              blk_rq_bio_prep(q, rq, bio);
++              /*
++               * A bad offset could cause us to require BIO_MAX_PAGES + 1
++               * pages. If this happens we just lower the requested
++               * mapping len by a page so that we can fit
++               */
++              if (end - start > BIO_MAX_PAGES)
++                      map_len -= PAGE_SIZE;
+ 
+-              rq->buffer = rq->data = NULL;
+-              rq->data_len = len;
+-              return 0;
++              ret = __blk_rq_map_user(q, rq, ubuf, map_len);
++              if (ret < 0)
++                      goto unmap_rq;
++              bytes_read += ret;
++              ubuf += ret;
+       }
+ 
+-      /*
+-       * bio is the err-ptr
+-       */
+-      return PTR_ERR(bio);
++      rq->buffer = rq->data = NULL;
++      return 0;
++unmap_rq:
++      blk_rq_unmap_user(rq);
++      return ret;
+ }
+ 
+ EXPORT_SYMBOL(blk_rq_map_user);
+@@ -2347,7 +2427,7 @@ EXPORT_SYMBOL(blk_rq_map_user);
+  *    unmapping.
+  */
+ int blk_rq_map_user_iov(request_queue_t *q, struct request *rq,
+-                      struct sg_iovec *iov, int iov_count)
++                      struct sg_iovec *iov, int iov_count, unsigned int len)
+ {
+       struct bio *bio;
+ 
+@@ -2361,10 +2441,15 @@ int blk_rq_map_user_iov(request_queue_t
+       if (IS_ERR(bio))
+               return PTR_ERR(bio);
+ 
+-      rq->bio = rq->biotail = bio;
++      if (bio->bi_size != len) {
++              bio_endio(bio, bio->bi_size, 0);
++              bio_unmap_user(bio);
++              return -EINVAL;
++      }
++
++      bio_get(bio);
+       blk_rq_bio_prep(q, rq, bio);
+       rq->buffer = rq->data = NULL;
+-      rq->data_len = bio->bi_size;
+       return 0;
+ }
+ 
+@@ -2372,23 +2457,26 @@ EXPORT_SYMBOL(blk_rq_map_user_iov);
+ 
+ /**
+  * blk_rq_unmap_user - unmap a request with user data
+- * @bio:      bio to be unmapped
+- * @ulen:     length of user buffer
++ * @rq:               rq to be unmapped
+  *
+  * Description:
+- *    Unmap a bio previously mapped by blk_rq_map_user().
++ *    Unmap a rq previously mapped by blk_rq_map_user().
++ *    rq->bio must be set to the original head of the request.
+  */
+-int blk_rq_unmap_user(struct bio *bio, unsigned int ulen)
++int blk_rq_unmap_user(struct request *rq)
+ {
+-      int ret = 0;
++      struct bio *bio, *mapped_bio;
+ 
+-      if (bio) {
+-              if (bio_flagged(bio, BIO_USER_MAPPED))
+-                      bio_unmap_user(bio);
++      while ((bio = rq->bio)) {
++              if (bio_flagged(bio, BIO_BOUNCED))
++                      mapped_bio = bio->bi_private;
+               else
+-                      ret = bio_uncopy_user(bio);
+-      }
++                      mapped_bio = bio;
+ 
++              __blk_rq_unmap_user(mapped_bio);
++              rq->bio = bio->bi_next;
++              bio_put(bio);
++      }
+       return 0;
+ }
+ 
+@@ -2419,11 +2507,8 @@ int blk_rq_map_kern(request_queue_t *q,
+       if (rq_data_dir(rq) == WRITE)
+               bio->bi_rw |= (1 << BIO_RW);
+ 
+-      rq->bio = rq->biotail = bio;
+       blk_rq_bio_prep(q, rq, bio);
+-
+       rq->buffer = rq->data = NULL;
+-      rq->data_len = len;
+       return 0;
+ }
+ 
+@@ -3429,6 +3514,7 @@ void blk_rq_bio_prep(request_queue_t *q,
+       rq->hard_cur_sectors = rq->current_nr_sectors;
+       rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
+       rq->buffer = bio_data(bio);
++      rq->data_len = bio->bi_size;
+ 
+       rq->bio = rq->biotail = bio;
+ }
+diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
+index 24f7af9..71f66dd 100644
+--- a/block/scsi_ioctl.c
++++ b/block/scsi_ioctl.c
+@@ -226,7 +226,6 @@ static int sg_io(struct file *file, requ
+       unsigned long start_time;
+       int writing = 0, ret = 0;
+       struct request *rq;
+-      struct bio *bio;
+       char sense[SCSI_SENSE_BUFFERSIZE];
+       unsigned char cmd[BLK_MAX_CDB];
+ 
+@@ -258,30 +257,6 @@ static int sg_io(struct file *file, requ
+       if (!rq)
+               return -ENOMEM;
+ 
+-      if (hdr->iovec_count) {
+-              const int size = sizeof(struct sg_iovec) * hdr->iovec_count;
+-              struct sg_iovec *iov;
+-
+-              iov = kmalloc(size, GFP_KERNEL);
+-              if (!iov) {
+-                      ret = -ENOMEM;
+-                      goto out;
+-              }
+-
+-              if (copy_from_user(iov, hdr->dxferp, size)) {
+-                      kfree(iov);
+-                      ret = -EFAULT;
+-                      goto out;
+-              }
+-
+-              ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count);
+-              kfree(iov);
+-      } else if (hdr->dxfer_len)
+-              ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len);
+-
+-      if (ret)
+-              goto out;
+-
+       /*
+        * fill in request structure
+        */
+@@ -295,7 +270,6 @@ static int sg_io(struct file *file, requ
+       rq->sense_len = 0;
+ 
+       rq->flags |= REQ_BLOCK_PC;
+-      bio = rq->bio;
+ 
+       /*
+        * bounce this after holding a reference to the original bio, it's
+@@ -310,6 +284,31 @@ static int sg_io(struct file *file, requ
+       if (!rq->timeout)
+               rq->timeout = BLK_DEFAULT_TIMEOUT;
+ 
++      if (hdr->iovec_count) {
++              const int size = sizeof(struct sg_iovec) * hdr->iovec_count;
++              struct sg_iovec *iov;
++
++              iov = kmalloc(size, GFP_KERNEL);
++              if (!iov) {
++                      ret = -ENOMEM;
++                      goto out;
++              }
++
++              if (copy_from_user(iov, hdr->dxferp, size)) {
++                      kfree(iov);
++                      ret = -EFAULT;
++                      goto out;
++              }
++
++              ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count,
++                                        hdr->dxfer_len);
++              kfree(iov);
++      } else if (hdr->dxfer_len)
++              ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len);
++
++      if (ret)
++              goto out;
++
+       rq->retries = 0;
+ 
+       start_time = jiffies;
+@@ -340,7 +339,7 @@ static int sg_io(struct file *file, requ
+                       hdr->sb_len_wr = len;
+       }
+ 
+-      if (blk_rq_unmap_user(bio, hdr->dxfer_len))
++      if (blk_rq_unmap_user(rq))
+               ret = -EFAULT;
+ 
+       /* may not have succeeded, but output values written to control
+diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
+index e866df0..3588e76 100644
+--- a/drivers/cdrom/cdrom.c
++++ b/drivers/cdrom/cdrom.c
+@@ -2133,16 +2133,14 @@ static int cdrom_read_cdda_bpc(struct cd
+               rq->timeout = 60 * HZ;
+               bio = rq->bio;
+ 
+-              if (rq->bio)
+-                      blk_queue_bounce(q, &rq->bio);
+-
+               if (blk_execute_rq(q, cdi->disk, rq, 0)) {
+                       struct request_sense *s = rq->sense;
+                       ret = -EIO;
+                       cdi->last_sense = s->sense_key;
+               }
+ 
+-              if (blk_rq_unmap_user(bio, len))
++              rq->bio = bio;
++              if (blk_rq_unmap_user(rq))
+                       ret = -EFAULT;
+ 
+               if (ret)
+diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
+index 3c606cf..c9a573b 100644
+--- a/drivers/scsi/Kconfig
++++ b/drivers/scsi/Kconfig
+@@ -27,6 +27,13 @@ config SCSI
+         However, do not compile this as a module if your root file system
+         (the one containing the directory /) is located on a SCSI device.
+ 
++config SCSI_TGT
++      tristate "SCSI target support"
++      depends on SCSI && EXPERIMENTAL
++      ---help---
++        If you want to use SCSI target mode drivers enable this option.
++        If you choose M, the module will be called scsi_tgt.
++
+ config SCSI_PROC_FS
+       bool "legacy /proc/scsi/ support"
+       depends on SCSI && PROC_FS
+@@ -890,6 +897,20 @@ config SCSI_IBMVSCSI
+         To compile this driver as a module, choose M here: the
+         module will be called ibmvscsic.
+ 
++config SCSI_IBMVSCSIS
++      tristate "IBM Virtual SCSI Server support"
++      depends on PPC_PSERIES && SCSI_TGT && SCSI_SRP
++      help
++        This is the SRP target driver for IBM pSeries virtual environments.
++
++        The userspace component needed to initialize the driver and
++        documentation can be found:
++
++        http://stgt.berlios.de/
++
++        To compile this driver as a module, choose M here: the
++        module will be called ibmvstgt.
++
+ config SCSI_INITIO
+       tristate "Initio 9100U(W) support"
+       depends on PCI && SCSI
+@@ -1827,6 +1848,16 @@ config ZFCP
+           called zfcp. If you want to compile it as a module, say M here
+           and read <file:Documentation/modules.txt>.
+ 
++config SCSI_SRP
++      tristate "SCSI RDMA Protocol helper library"
++      depends on SCSI && PCI
++      select SCSI_TGT
++      help
++        If you wish to use SRP target drivers, say Y.
++
++        To compile this driver as a module, choose M here: the
++        module will be called libsrp.
++
+ endmenu
+ 
+ source "drivers/scsi/pcmcia/Kconfig"
+diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
+index 320e765..0779523 100644
+--- a/drivers/scsi/Makefile
++++ b/drivers/scsi/Makefile
+@@ -21,6 +21,7 @@ CFLAGS_seagate.o =   -DARBITRATE -DPARIT
+ subdir-$(CONFIG_PCMCIA)               += pcmcia
+ 
+ obj-$(CONFIG_SCSI)            += scsi_mod.o
++obj-$(CONFIG_SCSI_TGT)                += scsi_tgt.o
+ 
+ obj-$(CONFIG_RAID_ATTRS)      += raid_class.o
+ 
+@@ -122,6 +123,7 @@ obj-$(CONFIG_SCSI_FCAL)            += fcal.o
+ obj-$(CONFIG_SCSI_LASI700)    += 53c700.o lasi700.o
+ obj-$(CONFIG_SCSI_NSP32)      += nsp32.o
+ obj-$(CONFIG_SCSI_IPR)                += ipr.o
++obj-$(CONFIG_SCSI_SRP)                += libsrp.o
+ obj-$(CONFIG_SCSI_IBMVSCSI)   += ibmvscsi/
+ obj-$(CONFIG_SCSI_SATA_AHCI)  += libata.o ahci.o
+ obj-$(CONFIG_SCSI_SATA_SVW)   += libata.o sata_svw.o
+@@ -155,6 +157,8 @@ scsi_mod-y                 += scsi.o hosts.o scsi_ioct
+ scsi_mod-$(CONFIG_SYSCTL)     += scsi_sysctl.o
+ scsi_mod-$(CONFIG_SCSI_PROC_FS)       += scsi_proc.o
+ 
++scsi_tgt-y                    += scsi_tgt_lib.o scsi_tgt_if.o
++
+ sd_mod-objs   := sd.o
+ sr_mod-objs   := sr.o sr_ioctl.o sr_vendor.o
+ ncr53c8xx-flags-$(CONFIG_SCSI_ZALON) \
+diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
+index 5881079..0b4c783 100644
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -263,6 +263,10 @@ static void scsi_host_dev_release(struct
+               kthread_stop(shost->ehandler);
+       if (shost->work_q)
+               destroy_workqueue(shost->work_q);
++      if (shost->uspace_req_q) {
++              kfree(shost->uspace_req_q->queuedata);
++              scsi_free_queue(shost->uspace_req_q);
++      }
+ 
+       scsi_destroy_command_freelist(shost);
+       kfree(shost->shost_data);
+diff --git a/drivers/scsi/ibmvscsi/Makefile b/drivers/scsi/ibmvscsi/Makefile
+index 4e247b6..6ac0633 100644
+--- a/drivers/scsi/ibmvscsi/Makefile
++++ b/drivers/scsi/ibmvscsi/Makefile
+@@ -3,3 +3,5 @@ obj-$(CONFIG_SCSI_IBMVSCSI)    += ibmvscsic
+ ibmvscsic-y                   += ibmvscsi.o
+ ibmvscsic-$(CONFIG_PPC_ISERIES)       += iseries_vscsi.o 
+ ibmvscsic-$(CONFIG_PPC_PSERIES)       += rpa_vscsi.o 
++
++obj-$(CONFIG_SCSI_IBMVSCSIS)  += ibmvstgt.o
+diff --git a/drivers/scsi/ibmvscsi/ibmvstgt.c 
b/drivers/scsi/ibmvscsi/ibmvstgt.c
+new file mode 100644
+index 0000000..73fcfca
+--- /dev/null
++++ b/drivers/scsi/ibmvscsi/ibmvstgt.c
+@@ -0,0 +1,958 @@
++/*
++ * IBM eServer i/pSeries Virtual SCSI Target Driver
++ * Copyright (C) 2003-2005 Dave Boutcher (boutcher@xxxxxxxxxx) IBM Corp.
++ *                       Santiago Leon (santil@xxxxxxxxxx) IBM Corp.
++ *                       Linda Xie (lxie@xxxxxxxxxx) IBM Corp.
++ *
++ * Copyright (C) 2005-2006 FUJITA Tomonori <tomof@xxxxxxx>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
++ * USA
++ */
++#include <linux/interrupt.h>
++#include <linux/module.h>
++#include <scsi/scsi.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_tgt.h>
++#include <scsi/libsrp.h>
++#include <asm/hvcall.h>
++#include <asm/iommu.h>
++#include <asm/prom.h>
++#include <asm/vio.h>
++
++#include "ibmvscsi.h"
++
++#define       INITIAL_SRP_LIMIT       16
++#define       DEFAULT_MAX_SECTORS     512
++
++#define       TGT_NAME        "ibmvstgt"
++
++/*
++ * Hypervisor calls.
++ */
++#define h_copy_rdma(l, sa, sb, da, db) \
++                      plpar_hcall_norets(H_COPY_RDMA, l, sa, sb, da, db)
++#define h_send_crq(ua, l, h) \
++                      plpar_hcall_norets(H_SEND_CRQ, ua, l, h)
++#define h_reg_crq(ua, tok, sz)\
++                      plpar_hcall_norets(H_REG_CRQ, ua, tok, sz);
++#define h_free_crq(ua) \
++                      plpar_hcall_norets(H_FREE_CRQ, ua);
++
++/* tmp - will replace with SCSI logging stuff */
++#define eprintk(fmt, args...)                                 \
++do {                                                          \
++      printk("%s(%d) " fmt, __FUNCTION__, __LINE__, ##args);  \
++} while (0)
++/* #define dprintk eprintk */
++#define dprintk(fmt, args...)
++
++struct vio_port {
++      struct vio_dev *dma_dev;
++
++      struct crq_queue crq_queue;
++      struct work_struct crq_work;
++
++      unsigned long liobn;
++      unsigned long riobn;
++};
++
++static struct workqueue_struct *vtgtd;
++
++/*
++ * These are fixed for the system and come from the Open Firmware device tree.
++ * We just store them here to save getting them every time.
++ */
++static char system_id[64] = "";
++static char partition_name[97] = "UNKNOWN";
++static unsigned int partition_number = -1;
++
++static struct vio_port *target_to_port(struct srp_target *target)
++{
++      return (struct vio_port *) target->ldata;
++}
++
++static inline union viosrp_iu *vio_iu(struct iu_entry *iue)
++{
++      return (union viosrp_iu *) (iue->sbuf->buf);
++}
++
++static int send_iu(struct iu_entry *iue, uint64_t length, uint8_t format)
++{
++      struct srp_target *target = iue->target;
++      struct vio_port *vport = target_to_port(target);
++      long rc, rc1;
++      union {
++              struct viosrp_crq cooked;
++              uint64_t raw[2];
++      } crq;
++
++      /* First copy the SRP */
++      rc = h_copy_rdma(length, vport->liobn, iue->sbuf->dma,
++                       vport->riobn, iue->remote_token);
++
++      if (rc)
++              eprintk("Error %ld transferring data\n", rc);
++
++      crq.cooked.valid = 0x80;
++      crq.cooked.format = format;
++      crq.cooked.reserved = 0x00;
++      crq.cooked.timeout = 0x00;
++      crq.cooked.IU_length = length;
++      crq.cooked.IU_data_ptr = vio_iu(iue)->srp.rsp.tag;
++
++      if (rc == 0)
++              crq.cooked.status = 0x99;       /* Just needs to be non-zero */
++      else
++              crq.cooked.status = 0x00;
++
++      rc1 = h_send_crq(vport->dma_dev->unit_address, crq.raw[0], crq.raw[1]);
++
++      if (rc1) {
++              eprintk("%ld sending response\n", rc1);
++              return rc1;
++      }
++
++      return rc;
++}
++
++#define SRP_RSP_SENSE_DATA_LEN        18
++
++static int send_rsp(struct iu_entry *iue, struct scsi_cmnd *sc,
++                  unsigned char status, unsigned char asc)
++{
++      union viosrp_iu *iu = vio_iu(iue);
++      uint64_t tag = iu->srp.rsp.tag;
++
++      /* If the linked bit is on and status is good */
++      if (test_bit(V_LINKED, &iue->flags) && (status == NO_SENSE))
++              status = 0x10;
++
++      memset(iu, 0, sizeof(struct srp_rsp));
++      iu->srp.rsp.opcode = SRP_RSP;
++      iu->srp.rsp.req_lim_delta = 1;
++      iu->srp.rsp.tag = tag;
++
++      if (test_bit(V_DIOVER, &iue->flags))
++              iu->srp.rsp.flags |= SRP_RSP_FLAG_DIOVER;
++
++      iu->srp.rsp.data_in_res_cnt = 0;
++      iu->srp.rsp.data_out_res_cnt = 0;
++
++      iu->srp.rsp.flags &= ~SRP_RSP_FLAG_RSPVALID;
++
++      iu->srp.rsp.resp_data_len = 0;
++      iu->srp.rsp.status = status;
++      if (status) {
++              uint8_t *sense = iu->srp.rsp.data;
++
++              if (sc) {
++                      iu->srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID;
++                      iu->srp.rsp.sense_data_len = SCSI_SENSE_BUFFERSIZE;
++                      memcpy(sense, sc->sense_buffer, SCSI_SENSE_BUFFERSIZE);
++              } else {
++                      iu->srp.rsp.status = SAM_STAT_CHECK_CONDITION;
++                      iu->srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID;
++                      iu->srp.rsp.sense_data_len = SRP_RSP_SENSE_DATA_LEN;
++
++                      /* Valid bit and 'current errors' */
++                      sense[0] = (0x1 << 7 | 0x70);
++                      /* Sense key */
++                      sense[2] = status;
++                      /* Additional sense length */
++                      sense[7] = 0xa; /* 10 bytes */
++                      /* Additional sense code */
++                      sense[12] = asc;
++              }
++      }
++
++      send_iu(iue, sizeof(iu->srp.rsp) + SRP_RSP_SENSE_DATA_LEN,
++              VIOSRP_SRP_FORMAT);
++
++      return 0;
++}
++
++static void handle_cmd_queue(struct srp_target *target)
++{
++      struct Scsi_Host *shost = target->shost;
++      struct iu_entry *iue;
++      struct srp_cmd *cmd;
++      unsigned long flags;
++      int err;
++
++retry:
++      spin_lock_irqsave(&target->lock, flags);
++
++      list_for_each_entry(iue, &target->cmd_queue, ilist) {
++              if (!test_and_set_bit(V_FLYING, &iue->flags)) {
++                      spin_unlock_irqrestore(&target->lock, flags);
++                      cmd = iue->sbuf->buf;
++                      err = srp_cmd_queue(shost, cmd, iue, 0);
++                      if (err) {
++                              eprintk("cannot queue cmd %p %d\n", cmd, err);
++                              srp_iu_put(iue);
++                      }
++                      goto retry;
++              }
++      }
++
++      spin_unlock_irqrestore(&target->lock, flags);
++}
++
++static int ibmvstgt_rdma(struct scsi_cmnd *sc, struct scatterlist *sg, int 
nsg,
++                       struct srp_direct_buf *md, int nmd,
++                       enum dma_data_direction dir, unsigned int rest)
++{
++      struct iu_entry *iue = (struct iu_entry *) sc->SCp.ptr;
++      struct srp_target *target = iue->target;
++      struct vio_port *vport = target_to_port(target);
++      dma_addr_t token;
++      long err;
++      unsigned int done = 0;
++      int i, sidx, soff;
++
++      sidx = soff = 0;
++      token = sg_dma_address(sg + sidx);
++
++      for (i = 0; i < nmd && rest; i++) {
++              unsigned int mdone, mlen;
++
++              mlen = min(rest, md[i].len);
++              for (mdone = 0; mlen;) {
++                      int slen = min(sg_dma_len(sg + sidx) - soff, mlen);
++
++                      if (dir == DMA_TO_DEVICE)
++                              err = h_copy_rdma(slen,
++                                                vport->riobn,
++                                                md[i].va + mdone,
++                                                vport->liobn,
++                                                token + soff);
++                      else
++                              err = h_copy_rdma(slen,
++                                                vport->liobn,
++                                                token + soff,
++                                                vport->riobn,
++                                                md[i].va + mdone);
++
++                      if (err != H_SUCCESS) {
++                              eprintk("rdma error %d %d\n", dir, slen);
++                              goto out;
++                      }
++
++                      mlen -= slen;
++                      mdone += slen;
++                      soff += slen;
++                      done += slen;
++
++                      if (soff == sg_dma_len(sg + sidx)) {
++                              sidx++;
++                              soff = 0;
++                              token = sg_dma_address(sg + sidx);
++
++                              if (sidx > nsg) {
++                                      eprintk("out of sg %p %d %d\n",
++                                              iue, sidx, nsg);
++                                      goto out;
++                              }
++                      }
++              };
++
++              rest -= mlen;
++      }
++out:
++
++      return 0;
++}
++
++static int ibmvstgt_transfer_data(struct scsi_cmnd *sc,
++                                void (*done)(struct scsi_cmnd *))
++{
++      struct iu_entry *iue = (struct iu_entry *) sc->SCp.ptr;
++      int err;
++
++      err = srp_transfer_data(sc, &vio_iu(iue)->srp.cmd, ibmvstgt_rdma, 1, 1);
++
++      done(sc);
++
++      return err;
++}
++
++static int ibmvstgt_cmd_done(struct scsi_cmnd *sc,
++                           void (*done)(struct scsi_cmnd *))
++{
++      unsigned long flags;
++      struct iu_entry *iue = (struct iu_entry *) sc->SCp.ptr;
++      struct srp_target *target = iue->target;
++
++      dprintk("%p %p %x\n", iue, target, vio_iu(iue)->srp.cmd.cdb[0]);
++
++      spin_lock_irqsave(&target->lock, flags);
++      list_del(&iue->ilist);
++      spin_unlock_irqrestore(&target->lock, flags);
++
++      if (sc->result != SAM_STAT_GOOD) {
++              eprintk("operation failed %p %d %x\n",
++                      iue, sc->result, vio_iu(iue)->srp.cmd.cdb[0]);
++              send_rsp(iue, sc, HARDWARE_ERROR, 0x00);
++      } else
++              send_rsp(iue, sc, NO_SENSE, 0x00);
++
++      done(sc);
++      srp_iu_put(iue);
++      return 0;
++}
++
++int send_adapter_info(struct iu_entry *iue,
++                    dma_addr_t remote_buffer, uint16_t length)
++{
++      struct srp_target *target = iue->target;
++      struct vio_port *vport = target_to_port(target);
++      struct Scsi_Host *shost = target->shost;
++      dma_addr_t data_token;
++      struct mad_adapter_info_data *info;
++      int err;
++
++      info = dma_alloc_coherent(target->dev, sizeof(*info), &data_token,
++                                GFP_KERNEL);
++      if (!info) {
++              eprintk("bad dma_alloc_coherent %p\n", target);
++              return 1;
++      }
++
++      /* Get remote info */
++      err = h_copy_rdma(sizeof(*info), vport->riobn, remote_buffer,
++                        vport->liobn, data_token);
++      if (err == H_SUCCESS) {
++              dprintk("Client connect: %s (%d)\n",
++                      info->partition_name, info->partition_number);
++      }
++
++      memset(info, 0, sizeof(*info));
++
++      strcpy(info->srp_version, "16.a");
++      strncpy(info->partition_name, partition_name,
++              sizeof(info->partition_name));
++      info->partition_number = partition_number;
++      info->mad_version = 1;
++      info->os_type = 2;
++      info->port_max_txu[0] = shost->hostt->max_sectors << 9;
++
++      /* Send our info to remote */
++      err = h_copy_rdma(sizeof(*info), vport->liobn, data_token,
++                        vport->riobn, remote_buffer);
++
++      dma_free_coherent(target->dev, sizeof(*info), info, data_token);
++
++      if (err != H_SUCCESS) {
++              eprintk("Error sending adapter info %d\n", err);
++              return 1;
++      }
++
++      return 0;
++}
++
++static void process_login(struct iu_entry *iue)
++{
++      union viosrp_iu *iu = vio_iu(iue);
++      struct srp_login_rsp *rsp = &iu->srp.login_rsp;
++      uint64_t tag = iu->srp.rsp.tag;
++
++      /* TODO handle case that requested size is wrong and
++       * buffer format is wrong
++       */
++      memset(iu, 0, sizeof(struct srp_login_rsp));
++      rsp->opcode = SRP_LOGIN_RSP;
++      rsp->req_lim_delta = INITIAL_SRP_LIMIT;
++      rsp->tag = tag;
++      rsp->max_it_iu_len = sizeof(union srp_iu);
++      rsp->max_ti_iu_len = sizeof(union srp_iu);
++      /* direct and indirect */
++      rsp->buf_fmt = SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT;
++
++      send_iu(iue, sizeof(*rsp), VIOSRP_SRP_FORMAT);
++}
++
++static inline void queue_cmd(struct iu_entry *iue)
++{
++      struct srp_target *target = iue->target;
++      unsigned long flags;
++
++      spin_lock_irqsave(&target->lock, flags);
++      list_add_tail(&iue->ilist, &target->cmd_queue);
++      spin_unlock_irqrestore(&target->lock, flags);
++}
++
++static int process_tsk_mgmt(struct iu_entry *iue)
++{
++      union viosrp_iu *iu = vio_iu(iue);
++      int fn;
++
++      dprintk("%p %u\n", iue, iu->srp.tsk_mgmt.tsk_mgmt_func);
++
++      switch (iu->srp.tsk_mgmt.tsk_mgmt_func) {
++      case SRP_TSK_ABORT_TASK:
++              fn = ABORT_TASK;
++              break;
++      case SRP_TSK_ABORT_TASK_SET:
++              fn = ABORT_TASK_SET;
++              break;
++      case SRP_TSK_CLEAR_TASK_SET:
++              fn = CLEAR_TASK_SET;
++              break;
++      case SRP_TSK_LUN_RESET:
++              fn = LOGICAL_UNIT_RESET;
++              break;
++      case SRP_TSK_CLEAR_ACA:
++              fn = CLEAR_ACA;
++              break;
++      default:
++              fn = 0;
++      }
++      if (fn)
++              scsi_tgt_tsk_mgmt_request(iue->target->shost, fn,
++                                        iu->srp.tsk_mgmt.task_tag,
++                                        (struct scsi_lun *) 
&iu->srp.tsk_mgmt.lun,
++                                        iue);
++      else
++              send_rsp(iue, NULL, ILLEGAL_REQUEST, 0x20);
++
++      return !fn;
++}
++
++static int process_mad_iu(struct iu_entry *iue)
++{
++      union viosrp_iu *iu = vio_iu(iue);
++      struct viosrp_adapter_info *info;
++      struct viosrp_host_config *conf;
++
++      switch (iu->mad.empty_iu.common.type) {
++      case VIOSRP_EMPTY_IU_TYPE:
++              eprintk("%s\n", "Unsupported EMPTY MAD IU");
++              break;
++      case VIOSRP_ERROR_LOG_TYPE:
++              eprintk("%s\n", "Unsupported ERROR LOG MAD IU");
++              iu->mad.error_log.common.status = 1;
++              send_iu(iue, sizeof(iu->mad.error_log), VIOSRP_MAD_FORMAT);
++              break;
++      case VIOSRP_ADAPTER_INFO_TYPE:
++              info = &iu->mad.adapter_info;
++              info->common.status = send_adapter_info(iue, info->buffer,
++                                                      info->common.length);
++              send_iu(iue, sizeof(*info), VIOSRP_MAD_FORMAT);
++              break;
++      case VIOSRP_HOST_CONFIG_TYPE:
++              conf = &iu->mad.host_config;
++              conf->common.status = 1;
++              send_iu(iue, sizeof(*conf), VIOSRP_MAD_FORMAT);
++              break;
++      default:
++              eprintk("Unknown type %u\n", iu->srp.rsp.opcode);
++      }
++
++      return 1;
++}
++
++static int process_srp_iu(struct iu_entry *iue)
++{
++      union viosrp_iu *iu = vio_iu(iue);
++      int done = 1;
++      u8 opcode = iu->srp.rsp.opcode;
++
++      switch (opcode) {
++      case SRP_LOGIN_REQ:
++              process_login(iue);
++              break;
++      case SRP_TSK_MGMT:
++              done = process_tsk_mgmt(iue);
++              break;
++      case SRP_CMD:
++              queue_cmd(iue);
++              done = 0;
++              break;
++      case SRP_LOGIN_RSP:
++      case SRP_I_LOGOUT:
++      case SRP_T_LOGOUT:
++      case SRP_RSP:
++      case SRP_CRED_REQ:
++      case SRP_CRED_RSP:
++      case SRP_AER_REQ:
++      case SRP_AER_RSP:
++              eprintk("Unsupported type %u\n", opcode);
++              break;
++      default:
++              eprintk("Unknown type %u\n", opcode);
++      }
++
++      return done;
++}
++
++static void process_iu(struct viosrp_crq *crq, struct srp_target *target)
++{
++      struct vio_port *vport = target_to_port(target);
++      struct iu_entry *iue;
++      long err, done;
++
++      iue = srp_iu_get(target);
++      if (!iue) {
++              eprintk("Error getting IU from pool, %p\n", target);
++              return;
++      }
++
++      iue->remote_token = crq->IU_data_ptr;
++
++      err = h_copy_rdma(crq->IU_length, vport->riobn,
++                        iue->remote_token, vport->liobn, iue->sbuf->dma);
++
++      if (err != H_SUCCESS) {
++              eprintk("%ld transferring data error %p\n", err, iue);
++              done = 1;
++              goto out;
++      }
++
++      if (crq->format == VIOSRP_MAD_FORMAT)
++              done = process_mad_iu(iue);
++      else
++              done = process_srp_iu(iue);
++out:
++      if (done)
++              srp_iu_put(iue);
++}
++
++static irqreturn_t ibmvstgt_interrupt(int irq, void *data)
++{
++      struct srp_target *target = (struct srp_target *) data;
++      struct vio_port *vport = target_to_port(target);
++
++      vio_disable_interrupts(vport->dma_dev);
++      queue_work(vtgtd, &vport->crq_work);
++
++      return IRQ_HANDLED;
++}
++
++static int crq_queue_create(struct crq_queue *queue, struct srp_target 
*target)
++{
++      int err;
++      struct vio_port *vport = target_to_port(target);
++
++      queue->msgs = (struct viosrp_crq *) get_zeroed_page(GFP_KERNEL);
++      if (!queue->msgs)
++              goto malloc_failed;
++      queue->size = PAGE_SIZE / sizeof(*queue->msgs);
++
++      queue->msg_token = dma_map_single(target->dev, queue->msgs,
++                                        queue->size * sizeof(*queue->msgs),
++                                        DMA_BIDIRECTIONAL);
++
++      if (dma_mapping_error(queue->msg_token))
++              goto map_failed;
++
++      err = h_reg_crq(vport->dma_dev->unit_address, queue->msg_token,
++                      PAGE_SIZE);
++
++      /* If the adapter was left active for some reason (like kexec)
++       * try freeing and re-registering
++       */
++      if (err == H_RESOURCE) {
++          do {
++              err = h_free_crq(vport->dma_dev->unit_address);
++          } while (err == H_BUSY || H_IS_LONG_BUSY(err));
++
++          err = h_reg_crq(vport->dma_dev->unit_address, queue->msg_token,
++                          PAGE_SIZE);
++      }
++
++      if (err != H_SUCCESS && err != 2) {
++              eprintk("Error 0x%x opening virtual adapter\n", err);
++              goto reg_crq_failed;
++      }
++
++      err = request_irq(vport->dma_dev->irq, &ibmvstgt_interrupt,
++                        SA_INTERRUPT, "ibmvstgt", target);
++      if (err)
++              goto req_irq_failed;
++
++      vio_enable_interrupts(vport->dma_dev);
++
++      h_send_crq(vport->dma_dev->unit_address, 0xC001000000000000, 0);
++
++      queue->cur = 0;
++      spin_lock_init(&queue->lock);
++
++      return 0;
++
++req_irq_failed:
++      do {
++              err = h_free_crq(vport->dma_dev->unit_address);
++      } while (err == H_BUSY || H_IS_LONG_BUSY(err));
++
++reg_crq_failed:
++      dma_unmap_single(target->dev, queue->msg_token,
++                       queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
++map_failed:
++      free_page((unsigned long) queue->msgs);
++
++malloc_failed:
++      return -ENOMEM;
++}
++
++static void crq_queue_destroy(struct srp_target *target)
++{
++      struct vio_port *vport = target_to_port(target);
++      struct crq_queue *queue = &vport->crq_queue;
++      int err;
++
++      free_irq(vport->dma_dev->irq, target);
++      do {
++              err = h_free_crq(vport->dma_dev->unit_address);
++      } while (err == H_BUSY || H_IS_LONG_BUSY(err));
++
++      dma_unmap_single(target->dev, queue->msg_token,
++                       queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
++
++      free_page((unsigned long) queue->msgs);
++}
++
++static void process_crq(struct viosrp_crq *crq,       struct srp_target 
*target)
++{
++      struct vio_port *vport = target_to_port(target);
++      dprintk("%x %x\n", crq->valid, crq->format);
++
++      switch (crq->valid) {
++      case 0xC0:
++              /* initialization */
++              switch (crq->format) {
++              case 0x01:
++                      h_send_crq(vport->dma_dev->unit_address,
++                                 0xC002000000000000, 0);
++                      break;
++              case 0x02:
++                      break;
++              default:
++                      eprintk("Unknown format %u\n", crq->format);
++              }
++              break;
++      case 0xFF:
++              /* transport event */
++              break;
++      case 0x80:
++              /* real payload */
++              switch (crq->format) {
++              case VIOSRP_SRP_FORMAT:
++              case VIOSRP_MAD_FORMAT:
++                      process_iu(crq, target);
++                      break;
++              case VIOSRP_OS400_FORMAT:
++              case VIOSRP_AIX_FORMAT:
++              case VIOSRP_LINUX_FORMAT:
++              case VIOSRP_INLINE_FORMAT:
++                      eprintk("Unsupported format %u\n", crq->format);
++                      break;
++              default:
++                      eprintk("Unknown format %u\n", crq->format);
++              }
++              break;
++      default:
++              eprintk("unknown message type 0x%02x!?\n", crq->valid);
++      }
++}
++
++static inline struct viosrp_crq *next_crq(struct crq_queue *queue)
++{
++      struct viosrp_crq *crq;
++      unsigned long flags;
++
++      spin_lock_irqsave(&queue->lock, flags);
++      crq = &queue->msgs[queue->cur];
++      if (crq->valid & 0x80) {
++              if (++queue->cur == queue->size)
++                      queue->cur = 0;
++      } else
++              crq = NULL;
++      spin_unlock_irqrestore(&queue->lock, flags);
++
++      return crq;
++}
++
++static void handle_crq(void *data)
++{
++      struct srp_target *target = (struct srp_target *) data;
++      struct vio_port *vport = target_to_port(target);
++      struct viosrp_crq *crq;
++      int done = 0;
++
++      while (!done) {
++              while ((crq = next_crq(&vport->crq_queue)) != NULL) {
++                      process_crq(crq, target);
++                      crq->valid = 0x00;
++              }
++
++              vio_enable_interrupts(vport->dma_dev);
++
++              crq = next_crq(&vport->crq_queue);
++              if (crq) {
++                      vio_disable_interrupts(vport->dma_dev);
++                      process_crq(crq, target);
++                      crq->valid = 0x00;
++              } else
++                      done = 1;
++      }
++
++      handle_cmd_queue(target);
++}
++
++
++static int ibmvstgt_eh_abort_handler(struct scsi_cmnd *sc)
++{
++      unsigned long flags;
++      struct iu_entry *iue = (struct iu_entry *) sc->SCp.ptr;
++      struct srp_target *target = iue->target;
++
++      dprintk("%p %p %x\n", iue, target, vio_iu(iue)->srp.cmd.cdb[0]);
++
++      spin_lock_irqsave(&target->lock, flags);
++      list_del(&iue->ilist);
++      spin_unlock_irqrestore(&target->lock, flags);
++
++      srp_iu_put(iue);
++
++      return 0;
++}
++
++static int ibmvstgt_tsk_mgmt_response(u64 mid, int result)
++{
++      struct iu_entry *iue = (struct iu_entry *) ((void *) mid);
++      union viosrp_iu *iu = vio_iu(iue);
++      unsigned char status, asc;
++
++      eprintk("%p %d\n", iue, result);
++      status = NO_SENSE;
++      asc = 0;
++
++      switch (iu->srp.tsk_mgmt.tsk_mgmt_func) {
++      case SRP_TSK_ABORT_TASK:
++              asc = 0x14;
++              if (result)
++                      status = ABORTED_COMMAND;
++              break;
++      default:
++              break;
++      }
++
++      send_rsp(iue, NULL, status, asc);
++      srp_iu_put(iue);
++
++      return 0;
++}
++
++static ssize_t system_id_show(struct class_device *cdev, char *buf)
++{
++      return snprintf(buf, PAGE_SIZE, "%s\n", system_id);
++}
++
++static ssize_t partition_number_show(struct class_device *cdev, char *buf)
++{
++      return snprintf(buf, PAGE_SIZE, "%x\n", partition_number);
++}
++
++static ssize_t unit_address_show(struct class_device *cdev, char *buf)
++{
++      struct Scsi_Host *shost = class_to_shost(cdev);
++      struct srp_target *target = host_to_srp_target(shost);
++      struct vio_port *vport = target_to_port(target);
++      return snprintf(buf, PAGE_SIZE, "%x\n", vport->dma_dev->unit_address);
++}
++
++static CLASS_DEVICE_ATTR(system_id, S_IRUGO, system_id_show, NULL);
++static CLASS_DEVICE_ATTR(partition_number, S_IRUGO, partition_number_show, 
NULL);
++static CLASS_DEVICE_ATTR(unit_address, S_IRUGO, unit_address_show, NULL);
++
++static struct class_device_attribute *ibmvstgt_attrs[] = {
++      &class_device_attr_system_id,
++      &class_device_attr_partition_number,
++      &class_device_attr_unit_address,
++      NULL,
++};
++
++static struct scsi_host_template ibmvstgt_sht = {
++      .name                   = TGT_NAME,
++      .module                 = THIS_MODULE,
++      .can_queue              = INITIAL_SRP_LIMIT,
++      .sg_tablesize           = SG_ALL,
++      .use_clustering         = DISABLE_CLUSTERING,
++      .max_sectors            = DEFAULT_MAX_SECTORS,
++      .transfer_response      = ibmvstgt_cmd_done,
++      .transfer_data          = ibmvstgt_transfer_data,
++      .eh_abort_handler       = ibmvstgt_eh_abort_handler,
++      .tsk_mgmt_response      = ibmvstgt_tsk_mgmt_response,
++      .shost_attrs            = ibmvstgt_attrs,
++      .proc_name              = TGT_NAME,
++};
++
++static int ibmvstgt_probe(struct vio_dev *dev, const struct vio_device_id *id)
++{
++      struct Scsi_Host *shost;
++      struct srp_target *target;
++      struct vio_port *vport;
++      unsigned int *dma, dma_size;
++      int err = -ENOMEM;
++
++      vport = kzalloc(sizeof(struct vio_port), GFP_KERNEL);
++      if (!vport)
++              return err;
++      shost = scsi_host_alloc(&ibmvstgt_sht, sizeof(struct srp_target));
++      if (!shost)
++              goto free_vport;
++      err = scsi_tgt_alloc_queue(shost);
++      if (err)
++              goto put_host;
++
++      target = host_to_srp_target(shost);
++      target->shost = shost;
++      vport->dma_dev = dev;
++      target->ldata = vport;
++      err = srp_target_alloc(target, &dev->dev, INITIAL_SRP_LIMIT,
++                             SRP_MAX_IU_LEN);
++      if (err)
++              goto put_host;
++
++      dma = (unsigned int *) vio_get_attribute(dev, "ibm,my-dma-window",
++                                               &dma_size);
++      if (!dma || dma_size != 40) {
++              eprintk("Couldn't get window property %d\n", dma_size);
++              err = -EIO;
++              goto free_srp_target;
++      }
++      vport->liobn = dma[0];
++      vport->riobn = dma[5];
++
++      INIT_WORK(&vport->crq_work, handle_crq, target);
++
++      err = crq_queue_create(&vport->crq_queue, target);
++      if (err)
++              goto free_srp_target;
++
++      err = scsi_add_host(shost, target->dev);
++      if (err)
++              goto destroy_queue;
++      return 0;
++
++destroy_queue:
++      crq_queue_destroy(target);
++free_srp_target:
++      srp_target_free(target);
++put_host:
++      scsi_host_put(shost);
++free_vport:
++      kfree(vport);
++      return err;
++}
++
++static int ibmvstgt_remove(struct vio_dev *dev)
++{
++      struct srp_target *target = (struct srp_target *) dev->dev.driver_data;
++      struct Scsi_Host *shost = target->shost;
++      struct vio_port *vport = target->ldata;
++
++      crq_queue_destroy(target);
++      scsi_remove_host(shost);
++      scsi_tgt_free_queue(shost);
++      srp_target_free(target);
++      kfree(vport);
++      scsi_host_put(shost);
++      return 0;
++}
++
++static struct vio_device_id ibmvstgt_device_table[] __devinitdata = {
++      {"v-scsi-host", "IBM,v-scsi-host"},
++      {"",""}
++};
++
++MODULE_DEVICE_TABLE(vio, ibmvstgt_device_table);
++
++static struct vio_driver ibmvstgt_driver = {
++      .id_table = ibmvstgt_device_table,
++      .probe = ibmvstgt_probe,
++      .remove = ibmvstgt_remove,
++      .driver = {
++              .name = "ibmvscsis",
++              .owner = THIS_MODULE,
++      }
++};
++
++static int get_system_info(void)
++{
++      struct device_node *rootdn;
++      const char *id, *model, *name;
++      unsigned int *num;
++
++      rootdn = find_path_device("/");
++      if (!rootdn)
++              return -ENOENT;
++
++      model = get_property(rootdn, "model", NULL);
++      id = get_property(rootdn, "system-id", NULL);
++      if (model && id)
++              snprintf(system_id, sizeof(system_id), "%s-%s", model, id);
++
++      name = get_property(rootdn, "ibm,partition-name", NULL);
++      if (name)
++              strncpy(partition_name, name, sizeof(partition_name));
++
++      num = (unsigned int *) get_property(rootdn, "ibm,partition-no", NULL);
++      if (num)
++              partition_number = *num;
++
++      return 0;
++}
++
++static int ibmvstgt_init(void)
++{
++      int err = -ENOMEM;
++
++      printk("IBM eServer i/pSeries Virtual SCSI Target Driver\n");
++
++      vtgtd = create_workqueue("ibmvtgtd");
++      if (!vtgtd)
++              return err;
++
++      err = get_system_info();
++      if (err)
++              goto destroy_wq;
++
++      err = vio_register_driver(&ibmvstgt_driver);
++      if (err)
++              goto destroy_wq;
++
++      return 0;
++
++destroy_wq:
++      destroy_workqueue(vtgtd);
++      return err;
++}
++
++static void ibmvstgt_exit(void)
++{
++      printk("Unregister IBM virtual SCSI driver\n");
++
++      destroy_workqueue(vtgtd);
++      vio_unregister_driver(&ibmvstgt_driver);
++}
++
++MODULE_DESCRIPTION("IBM Virtual SCSI Target");
++MODULE_AUTHOR("Dave Boutcher");
++MODULE_LICENSE("GPL");
++
++module_init(ibmvstgt_init);
++module_exit(ibmvstgt_exit);
+diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
+new file mode 100644
+index 0000000..ddb4ef5
+--- /dev/null
++++ b/drivers/scsi/libsrp.c
+@@ -0,0 +1,442 @@
++/*
++ * SCSI RDAM Protocol lib functions
++ *
++ * Copyright (C) 2006 FUJITA Tomonori <tomof@xxxxxxx>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
++ * 02110-1301 USA
++ */
++#include <linux/err.h>
++#include <linux/kfifo.h>
++#include <linux/scatterlist.h>
++#include <linux/dma-mapping.h>
++#include <linux/pci.h>
++#include <scsi/scsi.h>
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_tcq.h>
++#include <scsi/scsi_tgt.h>
++#include <scsi/srp.h>
++#include <scsi/libsrp.h>
++
++enum srp_task_attributes {
++      SRP_SIMPLE_TASK = 0,
++      SRP_HEAD_TASK = 1,
++      SRP_ORDERED_TASK = 2,
++      SRP_ACA_TASK = 4
++};
++
++/* tmp - will replace with SCSI logging stuff */
++#define eprintk(fmt, args...)                                 \
++do {                                                          \
++      printk("%s(%d) " fmt, __FUNCTION__, __LINE__, ##args);  \
++} while (0)
++/* #define dprintk eprintk */
++#define dprintk(fmt, args...)
++
++static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
++                           struct srp_buf **ring)
++{
++      int i;
++      struct iu_entry *iue;
++
++      q->pool = kcalloc(max, sizeof(struct iu_entry *), GFP_KERNEL);
++      if (!q->pool)
++              return -ENOMEM;
++      q->items = kcalloc(max, sizeof(struct iu_entry), GFP_KERNEL);
++      if (!q->items)
++              goto free_pool;
++
++      spin_lock_init(&q->lock);
++      q->queue = kfifo_init((void *) q->pool, max * sizeof(void *),
++                            GFP_KERNEL, &q->lock);
++      if (IS_ERR(q->queue))
++              goto free_item;
++
++      for (i = 0, iue = q->items; i < max; i++) {
++              __kfifo_put(q->queue, (void *) &iue, sizeof(void *));
++              iue->sbuf = ring[i];
++              iue++;
++      }
++      return 0;
++
++free_item:
++      kfree(q->items);
++free_pool:
++      kfree(q->pool);
++      return -ENOMEM;
++}
++
++static void srp_iu_pool_free(struct srp_queue *q)
++{
++      kfree(q->items);
++      kfree(q->pool);
++}
++
++static struct srp_buf **srp_ring_alloc(struct device *dev,
++                                     size_t max, size_t size)
++{
++      int i;
++      struct srp_buf **ring;
++
++      ring = kcalloc(max, sizeof(struct srp_buf *), GFP_KERNEL);
++      if (!ring)
++              return NULL;
++
++      for (i = 0; i < max; i++) {
++              ring[i] = kzalloc(sizeof(struct srp_buf), GFP_KERNEL);
++              if (!ring[i])
++                      goto out;
++              ring[i]->buf = dma_alloc_coherent(dev, size, &ring[i]->dma,
++                                                GFP_KERNEL);
++              if (!ring[i]->buf)
++                      goto out;
++      }
++      return ring;
++
++out:
++      for (i = 0; i < max && ring[i]; i++) {
++              if (ring[i]->buf)
++                      dma_free_coherent(dev, size, ring[i]->buf, 
ring[i]->dma);
++              kfree(ring[i]);
++      }
++      kfree(ring);
++
++      return NULL;
++}
++
++static void srp_ring_free(struct device *dev, struct srp_buf **ring, size_t 
max,
++                        size_t size)
++{
++      int i;
++
++      for (i = 0; i < max; i++) {
++              dma_free_coherent(dev, size, ring[i]->buf, ring[i]->dma);
++              kfree(ring[i]);
++      }
++}
++
++int srp_target_alloc(struct srp_target *target, struct device *dev,
++                   size_t nr, size_t iu_size)
++{
++      int err;
++
++      spin_lock_init(&target->lock);
++      INIT_LIST_HEAD(&target->cmd_queue);
++
++      target->dev = dev;
++      target->dev->driver_data = target;
++
++      target->srp_iu_size = iu_size;
++      target->rx_ring_size = nr;
++      target->rx_ring = srp_ring_alloc(target->dev, nr, iu_size);
++      if (!target->rx_ring)
++              return -ENOMEM;
++      err = srp_iu_pool_alloc(&target->iu_queue, nr, target->rx_ring);
++      if (err)
++              goto free_ring;
++
++      return 0;
++
++free_ring:
++      srp_ring_free(target->dev, target->rx_ring, nr, iu_size);
++      return -ENOMEM;
++}
++EXPORT_SYMBOL_GPL(srp_target_alloc);
++
++void srp_target_free(struct srp_target *target)
++{
++      srp_ring_free(target->dev, target->rx_ring, target->rx_ring_size,
++                    target->srp_iu_size);
++      srp_iu_pool_free(&target->iu_queue);
++}
++EXPORT_SYMBOL_GPL(srp_target_free);
++
++struct iu_entry *srp_iu_get(struct srp_target *target)
++{
++      struct iu_entry *iue = NULL;
++
++      kfifo_get(target->iu_queue.queue, (void *) &iue, sizeof(void *));
++      if (!iue)
++              return iue;
++      iue->target = target;
++      INIT_LIST_HEAD(&iue->ilist);
++      iue->flags = 0;
++      return iue;
++}
++EXPORT_SYMBOL_GPL(srp_iu_get);
++
++void srp_iu_put(struct iu_entry *iue)
++{
++      kfifo_put(iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
++}
++EXPORT_SYMBOL_GPL(srp_iu_put);
++
++static int srp_direct_data(struct scsi_cmnd *sc, struct srp_direct_buf *md,
++                         enum dma_data_direction dir, srp_rdma_t rdma_io,
++                         int dma_map, int ext_desc)
++{
++      struct iu_entry *iue = NULL;
++      struct scatterlist *sg = NULL;
++      int err, nsg = 0, len;
++
++      if (dma_map) {
++              iue = (struct iu_entry *) sc->SCp.ptr;
++              sg = sc->request_buffer;
++
++              dprintk("%p %u %u %d\n", iue, sc->request_bufflen,
++                      md->len, sc->use_sg);
++
++              nsg = dma_map_sg(iue->target->dev, sg, sc->use_sg,
++                               DMA_BIDIRECTIONAL);
++              if (!nsg) {
++                      printk("fail to map %p %d\n", iue, sc->use_sg);
++                      return 0;
++              }
++              len = min(sc->request_bufflen, md->len);
++      } else
++              len = md->len;
++
++      err = rdma_io(sc, sg, nsg, md, 1, dir, len);
++
++      if (dma_map)
++              dma_unmap_sg(iue->target->dev, sg, nsg, DMA_BIDIRECTIONAL);
++
++      return err;
++}
++
++static int srp_indirect_data(struct scsi_cmnd *sc, struct srp_cmd *cmd,
++                           struct srp_indirect_buf *id,
++                           enum dma_data_direction dir, srp_rdma_t rdma_io,
++                           int dma_map, int ext_desc)
++{
++      struct iu_entry *iue = NULL;
++      struct srp_direct_buf *md = NULL;
++      struct scatterlist dummy, *sg = NULL;
++      dma_addr_t token = 0;
++      long err;
++      unsigned int done = 0;
++      int nmd, nsg = 0, len;
++
++      if (dma_map || ext_desc) {
++              iue = (struct iu_entry *) sc->SCp.ptr;
++              sg = sc->request_buffer;
++
++              dprintk("%p %u %u %d %d\n",
++                      iue, sc->request_bufflen, id->len,
++                      cmd->data_in_desc_cnt, cmd->data_out_desc_cnt);
++      }
++
++      nmd = id->table_desc.len / sizeof(struct srp_direct_buf);
++
++      if ((dir == DMA_FROM_DEVICE && nmd == cmd->data_in_desc_cnt) ||
++          (dir == DMA_TO_DEVICE && nmd == cmd->data_out_desc_cnt)) {
++              md = &id->desc_list[0];
++              goto rdma;
++      }
++
++      if (ext_desc && dma_map) {
++              md = dma_alloc_coherent(iue->target->dev, id->table_desc.len,
++                              &token, GFP_KERNEL);
++              if (!md) {
++                      eprintk("Can't get dma memory %u\n", 
id->table_desc.len);
++                      return -ENOMEM;
++              }
++
++              sg_init_one(&dummy, md, id->table_desc.len);
++              sg_dma_address(&dummy) = token;
++              err = rdma_io(sc, &dummy, 1, &id->table_desc, 1, DMA_TO_DEVICE,
++                            id->table_desc.len);
++              if (err < 0) {
++                      eprintk("Error copying indirect table %ld\n", err);
++                      goto free_mem;
++              }
++      } else {
++              eprintk("This command uses external indirect buffer\n");
++              return -EINVAL;
++      }
++
++rdma:
++      if (dma_map) {
++              nsg = dma_map_sg(iue->target->dev, sg, sc->use_sg, 
DMA_BIDIRECTIONAL);
++              if (!nsg) {
++                      eprintk("fail to map %p %d\n", iue, sc->use_sg);
++                      goto free_mem;
++              }
++              len = min(sc->request_bufflen, id->len);
++      } else
++              len = id->len;
++
++      err = rdma_io(sc, sg, nsg, md, nmd, dir, len);
++
++      if (dma_map)
++              dma_unmap_sg(iue->target->dev, sg, nsg, DMA_BIDIRECTIONAL);
++
++free_mem:
++      if (token && dma_map)
++              dma_free_coherent(iue->target->dev, id->table_desc.len, md, 
token);
++
++      return done;
++}
++
++static int data_out_desc_size(struct srp_cmd *cmd)
++{
++      int size = 0;
++      u8 fmt = cmd->buf_fmt >> 4;
++
++      switch (fmt) {
++      case SRP_NO_DATA_DESC:
++              break;
++      case SRP_DATA_DESC_DIRECT:
++              size = sizeof(struct srp_direct_buf);
++              break;
++      case SRP_DATA_DESC_INDIRECT:
++              size = sizeof(struct srp_indirect_buf) +
++                      sizeof(struct srp_direct_buf) * cmd->data_out_desc_cnt;
++              break;
++      default:
++              eprintk("client error. Invalid data_out_format %x\n", fmt);
++              break;
++      }
++      return size;
++}
++
++/*
++ * TODO: this can be called multiple times for a single command if it
++ * has very long data.
++ */
++int srp_transfer_data(struct scsi_cmnd *sc, struct srp_cmd *cmd,
++                    srp_rdma_t rdma_io, int dma_map, int ext_desc)
++{
++      struct srp_direct_buf *md;
++      struct srp_indirect_buf *id;
++      enum dma_data_direction dir;
++      int offset, err = 0;
++      u8 format;
++
++      offset = cmd->add_cdb_len * 4;
++
++      dir = srp_cmd_direction(cmd);
++      if (dir == DMA_FROM_DEVICE)
++              offset += data_out_desc_size(cmd);
++
++      if (dir == DMA_TO_DEVICE)
++              format = cmd->buf_fmt >> 4;
++      else
++              format = cmd->buf_fmt & ((1U << 4) - 1);
++
++      switch (format) {
++      case SRP_NO_DATA_DESC:
++              break;
++      case SRP_DATA_DESC_DIRECT:
++              md = (struct srp_direct_buf *)
++                      (cmd->add_data + offset);
++              err = srp_direct_data(sc, md, dir, rdma_io, dma_map, ext_desc);
++              break;
++      case SRP_DATA_DESC_INDIRECT:
++              id = (struct srp_indirect_buf *)
++                      (cmd->add_data + offset);
++              err = srp_indirect_data(sc, cmd, id, dir, rdma_io, dma_map,
++                                      ext_desc);
++              break;
++      default:
++              eprintk("Unknown format %d %x\n", dir, format);
++              break;
++      }
++
++      return err;
++}
++EXPORT_SYMBOL_GPL(srp_transfer_data);
++
++static int vscsis_data_length(struct srp_cmd *cmd, enum dma_data_direction 
dir)
++{
++      struct srp_direct_buf *md;
++      struct srp_indirect_buf *id;
++      int len = 0, offset = cmd->add_cdb_len * 4;
++      u8 fmt;
++
++      if (dir == DMA_TO_DEVICE)
++              fmt = cmd->buf_fmt >> 4;
++      else {
++              fmt = cmd->buf_fmt & ((1U << 4) - 1);
++              offset += data_out_desc_size(cmd);
++      }
++
++      switch (fmt) {
++      case SRP_NO_DATA_DESC:
++              break;
++      case SRP_DATA_DESC_DIRECT:
++              md = (struct srp_direct_buf *) (cmd->add_data + offset);
++              len = md->len;
++              break;
++      case SRP_DATA_DESC_INDIRECT:
++              id = (struct srp_indirect_buf *) (cmd->add_data + offset);
++              len = id->len;
++              break;
++      default:
++              eprintk("invalid data format %x\n", fmt);
++              break;
++      }
++      return len;
++}
++
++int srp_cmd_queue(struct Scsi_Host *shost, struct srp_cmd *cmd, void *info,
++                u64 addr)
++{
++      enum dma_data_direction dir;
++      struct scsi_cmnd *sc;
++      int tag, len, err;
++
++      switch (cmd->task_attr) {
++      case SRP_SIMPLE_TASK:
++              tag = MSG_SIMPLE_TAG;
++              break;
++      case SRP_ORDERED_TASK:
++              tag = MSG_ORDERED_TAG;
++              break;
++      case SRP_HEAD_TASK:
++              tag = MSG_HEAD_TAG;
++              break;
++      default:
++              eprintk("Task attribute %d not supported\n", cmd->task_attr);
++              tag = MSG_ORDERED_TAG;
++      }
++
++      dir = srp_cmd_direction(cmd);
++      len = vscsis_data_length(cmd, dir);
++
++      dprintk("%p %x %lx %d %d %d %llx\n", info, cmd->cdb[0],
++              cmd->lun, dir, len, tag, (unsigned long long) cmd->tag);
++
++      sc = scsi_host_get_command(shost, dir, GFP_KERNEL);
++      if (!sc)
++              return -ENOMEM;
++
++      sc->SCp.ptr = info;
++      memcpy(sc->cmnd, cmd->cdb, MAX_COMMAND_SIZE);
++      sc->request_bufflen = len;
++      sc->request_buffer = (void *)(unsigned long)addr;
++      sc->tag = tag;
++      sc->host_scribble = (void *)shost;
++      err = scsi_tgt_queue_command(sc, (struct scsi_lun *) &cmd->lun, 
cmd->tag);
++      if (err)
++              scsi_host_put_command(shost, sc);
++
++      return err;
++}
++EXPORT_SYMBOL_GPL(srp_cmd_queue);
++
++MODULE_DESCRIPTION("SCSI RDAM Protocol lib functions");
++MODULE_AUTHOR("FUJITA Tomonori");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
+index c551bb8..1d2fbe0 100644
+--- a/drivers/scsi/scsi.c
++++ b/drivers/scsi/scsi.c
+@@ -212,8 +212,7 @@ static struct scsi_host_cmd_pool scsi_cm
+ 
+ static DEFINE_MUTEX(host_cmd_pool_mutex);
+ 
+-static struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost,
+-                                          gfp_t gfp_mask)
++struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask)
+ {
+       struct scsi_cmnd *cmd;
+ 
+@@ -234,6 +233,7 @@ static struct scsi_cmnd *__scsi_get_comm
+ 
+       return cmd;
+ }
++EXPORT_SYMBOL_GPL(__scsi_get_command);
+ 
+ /*
+  * Function:  scsi_get_command()
+@@ -270,9 +270,29 @@ struct scsi_cmnd *scsi_get_command(struc
+               put_device(&dev->sdev_gendev);
+ 
+       return cmd;
+-}                             
++}
+ EXPORT_SYMBOL(scsi_get_command);
+ 
++void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd,
++                      struct device *dev)
++{
++      unsigned long flags;
++
++      /* changing locks here, don't need to restore the irq state */
++      spin_lock_irqsave(&shost->free_list_lock, flags);
++      if (unlikely(list_empty(&shost->free_list))) {
++              list_add(&cmd->list, &shost->free_list);
++              cmd = NULL;
++      }
++      spin_unlock_irqrestore(&shost->free_list_lock, flags);
++
++      if (likely(cmd != NULL))
++              kmem_cache_free(shost->cmd_pool->slab, cmd);
++
++      put_device(dev);
++}
++EXPORT_SYMBOL(__scsi_put_command);
++
+ /*
+  * Function:  scsi_put_command()
+  *
+@@ -287,26 +307,15 @@ EXPORT_SYMBOL(scsi_get_command);
+ void scsi_put_command(struct scsi_cmnd *cmd)
+ {
+       struct scsi_device *sdev = cmd->device;
+-      struct Scsi_Host *shost = sdev->host;
+       unsigned long flags;
+-      
++
+       /* serious error if the command hasn't come from a device list */
+       spin_lock_irqsave(&cmd->device->list_lock, flags);
+       BUG_ON(list_empty(&cmd->list));
+       list_del_init(&cmd->list);
+-      spin_unlock(&cmd->device->list_lock);
+-      /* changing locks here, don't need to restore the irq state */
+-      spin_lock(&shost->free_list_lock);
+-      if (unlikely(list_empty(&shost->free_list))) {
+-              list_add(&cmd->list, &shost->free_list);
+-              cmd = NULL;
+-      }
+-      spin_unlock_irqrestore(&shost->free_list_lock, flags);
++      spin_unlock_irqrestore(&cmd->device->list_lock, flags);
+ 
+-      if (likely(cmd != NULL))
+-              kmem_cache_free(shost->cmd_pool->slab, cmd);
+-
+-      put_device(&sdev->sdev_gendev);
++      __scsi_put_command(cmd->device->host, cmd, &sdev->sdev_gendev);
+ }
+ EXPORT_SYMBOL(scsi_put_command);
+ 
+diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
+index a0cd6de..d94ea0f 100644
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -804,7 +804,7 @@ static struct scsi_cmnd *scsi_end_reques
+       return NULL;
+ }
+ 
+-static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t 
gfp_mask)
++struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
+ {
+       struct scsi_host_sg_pool *sgp;
+       struct scatterlist *sgl;
+@@ -845,7 +845,9 @@ #endif
+       return sgl;
+ }
+ 
+-static void scsi_free_sgtable(struct scatterlist *sgl, int index)
++EXPORT_SYMBOL(scsi_alloc_sgtable);
++
++void scsi_free_sgtable(struct scatterlist *sgl, int index)
+ {
+       struct scsi_host_sg_pool *sgp;
+ 
+@@ -855,6 +857,8 @@ static void scsi_free_sgtable(struct sca
+       mempool_free(sgl, sgp->pool);
+ }
+ 
++EXPORT_SYMBOL(scsi_free_sgtable);
++
+ /*
+  * Function:    scsi_release_buffers()
+  *
+@@ -1687,29 +1691,40 @@ u64 scsi_calculate_bounce_limit(struct S
+ }
+ EXPORT_SYMBOL(scsi_calculate_bounce_limit);
+ 
+-struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
++struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
++                                       request_fn_proc *request_fn)
+ {
+-      struct Scsi_Host *shost = sdev->host;
+       struct request_queue *q;
+ 
+-      q = blk_init_queue(scsi_request_fn, NULL);
++      q = blk_init_queue(request_fn, NULL);
+       if (!q)
+               return NULL;
+ 
+-      blk_queue_prep_rq(q, scsi_prep_fn);
+-
+       blk_queue_max_hw_segments(q, shost->sg_tablesize);
+       blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
+       blk_queue_max_sectors(q, shost->max_sectors);
+       blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
+       blk_queue_segment_boundary(q, shost->dma_boundary);
+-      blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
+-      blk_queue_softirq_done(q, scsi_softirq_done);
+ 
+       if (!shost->use_clustering)
+               clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+       return q;
+ }
++EXPORT_SYMBOL(__scsi_alloc_queue);
++
++struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
++{
++      struct request_queue *q;
++
++      q = __scsi_alloc_queue(sdev->host, scsi_request_fn);
++      if (!q)
++              return NULL;
++
++      blk_queue_prep_rq(q, scsi_prep_fn);
++      blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
++      blk_queue_softirq_done(q, scsi_softirq_done);
++      return q;
++}
+ 
+ void scsi_free_queue(struct request_queue *q)
+ {
+diff --git a/drivers/scsi/scsi_tgt_if.c b/drivers/scsi/scsi_tgt_if.c
+new file mode 100644
+index 0000000..55bb961
+--- /dev/null
++++ b/drivers/scsi/scsi_tgt_if.c
+@@ -0,0 +1,351 @@
++/*
++ * SCSI target kernel/user interface functions
++ *
++ * Copyright (C) 2005 FUJITA Tomonori <tomof@xxxxxxx>
++ * Copyright (C) 2005 Mike Christie <michaelc@xxxxxxxxxxx>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
++ * 02110-1301 USA
++ */
++#include <linux/miscdevice.h>
++#include <linux/file.h>
++#include <net/tcp.h>
++#include <scsi/scsi.h>
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_tgt.h>
++#include <scsi/scsi_tgt_if.h>
++
++#include "scsi_tgt_priv.h"
++
++struct tgt_ring {
++      u32 tr_idx;
++      unsigned long tr_pages[TGT_RING_PAGES];
++      spinlock_t tr_lock;
++};
++
++/* tx_ring : kernel->user, rx_ring : user->kernel */
++static struct tgt_ring tx_ring, rx_ring;
++static DECLARE_WAIT_QUEUE_HEAD(tgt_poll_wait);
++
++static inline void tgt_ring_idx_inc(struct tgt_ring *ring)
++{
++      if (ring->tr_idx == TGT_MAX_EVENTS - 1)
++              ring->tr_idx = 0;
++      else
++              ring->tr_idx++;
++}
++
++static struct tgt_event *tgt_head_event(struct tgt_ring *ring, u32 idx)
++{
++      u32 pidx, off;
++
++      pidx = idx / TGT_EVENT_PER_PAGE;
++      off = idx % TGT_EVENT_PER_PAGE;
++
++      return (struct tgt_event *)
++              (ring->tr_pages[pidx] + sizeof(struct tgt_event) * off);
++}
++
++static int tgt_uspace_send_event(u32 type, struct tgt_event *p)
++{
++      struct tgt_event *ev;
++      struct tgt_ring *ring = &tx_ring;
++      unsigned long flags;
++      int err = 0;
++
++      spin_lock_irqsave(&ring->tr_lock, flags);
++
++      ev = tgt_head_event(ring, ring->tr_idx);
++      if (!ev->hdr.status)
++              tgt_ring_idx_inc(ring);
++      else
++              err = -BUSY;
++
++      spin_unlock_irqrestore(&ring->tr_lock, flags);
++
++      if (err)
++              return err;
++
++      memcpy(ev, p, sizeof(*ev));
++      ev->hdr.type = type;
++      mb();
++      ev->hdr.status = 1;
++
++      flush_dcache_page(virt_to_page(ev));
++
++      wake_up_interruptible(&tgt_poll_wait);
++
++      return 0;
++}
++
++int scsi_tgt_uspace_send_cmd(struct scsi_cmnd *cmd, struct scsi_lun *lun, u64 
tag)
++{
++      struct Scsi_Host *shost = scsi_tgt_cmd_to_host(cmd);
++      struct tgt_event ev;
++      int err;
++
++      memset(&ev, 0, sizeof(ev));
++      ev.p.cmd_req.host_no = shost->host_no;
++      ev.p.cmd_req.data_len = cmd->request_bufflen;
++      memcpy(ev.p.cmd_req.scb, cmd->cmnd, sizeof(ev.p.cmd_req.scb));
++      memcpy(ev.p.cmd_req.lun, lun, sizeof(ev.p.cmd_req.lun));
++      ev.p.cmd_req.attribute = cmd->tag;
++      ev.p.cmd_req.uaddr = (unsigned long)cmd->request_buffer;
++      ev.p.cmd_req.tag = tag;
++
++      dprintk("%p %d %u %x %llx\n", cmd, shost->host_no,
++              ev.p.cmd_req.data_len, cmd->tag,
++              (unsigned long long) ev.p.cmd_req.tag);
++
++      err = tgt_uspace_send_event(TGT_KEVENT_CMD_REQ, &ev);
++      if (err)
++              eprintk("tx buf is full, could not send\n");
++
++      return err;
++}
++
++int scsi_tgt_uspace_send_status(struct scsi_cmnd *cmd, u64 tag)
++{
++      struct Scsi_Host *shost = scsi_tgt_cmd_to_host(cmd);
++      struct tgt_event ev;
++      int err;
++
++      memset(&ev, 0, sizeof(ev));
++      ev.p.cmd_done.host_no = shost->host_no;
++      ev.p.cmd_done.tag = tag;
++      ev.p.cmd_done.result = cmd->result;
++
++      dprintk("%p %d %llu %u %x\n", cmd, shost->host_no,
++              (unsigned long long) ev.p.cmd_req.tag,
++              ev.p.cmd_req.data_len, cmd->tag);
++
++      err = tgt_uspace_send_event(TGT_KEVENT_CMD_DONE, &ev);
++      if (err)
++              eprintk("tx buf is full, could not send\n");
++
++      return err;
++}
++
++int scsi_tgt_uspace_send_tsk_mgmt(int host_no, int function, u64 tag,
++                                struct scsi_lun *scsilun, void *data)
++{
++      struct tgt_event ev;
++      int err;
++
++      memset(&ev, 0, sizeof(ev));
++      ev.p.tsk_mgmt_req.host_no = host_no;
++      ev.p.tsk_mgmt_req.function = function;
++      ev.p.tsk_mgmt_req.tag = tag;
++      memcpy(ev.p.tsk_mgmt_req.lun, scsilun, sizeof(ev.p.tsk_mgmt_req.lun));
++      ev.p.tsk_mgmt_req.mid = (u64) (unsigned long) data;
++
++      dprintk("%d %x %llx %llx\n", host_no, function, (unsigned long long) 
tag,
++              (unsigned long long) ev.p.tsk_mgmt_req.mid);
++
++      err = tgt_uspace_send_event(TGT_KEVENT_TSK_MGMT_REQ, &ev);
++      if (err)
++              eprintk("tx buf is full, could not send\n");
++
++      return err;
++}
++
++static int event_recv_msg(struct tgt_event *ev)
++{
++      int err = 0;
++
++      switch (ev->hdr.type) {
++      case TGT_UEVENT_CMD_RSP:
++              err = scsi_tgt_kspace_exec(ev->p.cmd_rsp.host_no,
++                                         ev->p.cmd_rsp.tag,
++                                         ev->p.cmd_rsp.result,
++                                         ev->p.cmd_rsp.len,
++                                         ev->p.cmd_rsp.uaddr,
++                                         ev->p.cmd_rsp.rw);
++              break;
++      case TGT_UEVENT_TSK_MGMT_RSP:
++              err = scsi_tgt_kspace_tsk_mgmt(ev->p.tsk_mgmt_rsp.host_no,
++                                             ev->p.tsk_mgmt_rsp.mid,
++                                             ev->p.tsk_mgmt_rsp.result);
++              break;
++      default:
++              eprintk("unknown type %d\n", ev->hdr.type);
++              err = -EINVAL;
++      }
++
++      return err;
++}
++
++static ssize_t tgt_write(struct file *file, const char __user * buffer,
++                       size_t count, loff_t * ppos)
++{
++      struct tgt_event *ev;
++      struct tgt_ring *ring = &rx_ring;
++
++      while (1) {
++              ev = tgt_head_event(ring, ring->tr_idx);
++              /* do we need this? */
++              flush_dcache_page(virt_to_page(ev));
++
++              if (!ev->hdr.status)
++                      break;
++
++              tgt_ring_idx_inc(ring);
++              event_recv_msg(ev);
++              ev->hdr.status = 0;
++      };
++
++      return count;
++}
++
++static unsigned int tgt_poll(struct file * file, struct poll_table_struct 
*wait)
++{
++      struct tgt_event *ev;
++      struct tgt_ring *ring = &tx_ring;
++      unsigned long flags;
++      unsigned int mask = 0;
++      u32 idx;
++
++      poll_wait(file, &tgt_poll_wait, wait);
++
++      spin_lock_irqsave(&ring->tr_lock, flags);
++
++      idx = ring->tr_idx ? ring->tr_idx - 1 : TGT_MAX_EVENTS - 1;
++      ev = tgt_head_event(ring, idx);
++      if (ev->hdr.status)
++              mask |= POLLIN | POLLRDNORM;
++
++      spin_unlock_irqrestore(&ring->tr_lock, flags);
++
++      return mask;
++}
++
++static int uspace_ring_map(struct vm_area_struct *vma, unsigned long addr,
++                         struct tgt_ring *ring)
++{
++      int i, err;
++
++      for (i = 0; i < TGT_RING_PAGES; i++) {
++              struct page *page = virt_to_page(ring->tr_pages[i]);
++              err = vm_insert_page(vma, addr, page);
++              if (err)
++                      return err;
++              addr += PAGE_SIZE;
++      }
++
++      return 0;
++}
++
++static int tgt_mmap(struct file *filp, struct vm_area_struct *vma)
++{
++      unsigned long addr;
++      int err;
++
++      if (vma->vm_pgoff)
++              return -EINVAL;
++
++      if (vma->vm_end - vma->vm_start != TGT_RING_SIZE * 2) {
++              eprintk("mmap size must be %lu, not %lu \n",
++                      TGT_RING_SIZE * 2, vma->vm_end - vma->vm_start);
++              return -EINVAL;
++      }
++
++      addr = vma->vm_start;
++      err = uspace_ring_map(vma, addr, &tx_ring);
++      if (err)
++              return err;
++      err = uspace_ring_map(vma, addr + TGT_RING_SIZE, &rx_ring);
++
++      return err;
++}
++
++static int tgt_open(struct inode *inode, struct file *file)
++{
++      tx_ring.tr_idx = rx_ring.tr_idx = 0;
++
++      return 0;
++}
++
++static struct file_operations tgt_fops = {
++      .owner          = THIS_MODULE,
++      .open           = tgt_open,
++      .poll           = tgt_poll,
++      .write          = tgt_write,
++      .mmap           = tgt_mmap,
++};
++
++static struct miscdevice tgt_miscdev = {
++      .minor = MISC_DYNAMIC_MINOR,
++      .name = "tgt",
++      .fops = &tgt_fops,
++};
++
++static void tgt_ring_exit(struct tgt_ring *ring)
++{
++      int i;
++
++      for (i = 0; i < TGT_RING_PAGES; i++)
++              free_page(ring->tr_pages[i]);
++}
++
++static int tgt_ring_init(struct tgt_ring *ring)
++{
++      int i;
++
++      spin_lock_init(&ring->tr_lock);
++
++      for (i = 0; i < TGT_RING_PAGES; i++) {
++              ring->tr_pages[i] = get_zeroed_page(GFP_KERNEL);
++              if (!ring->tr_pages[i]) {
++                      eprintk("out of memory\n");
++                      return -ENOMEM;
++              }
++      }
++
++      return 0;
++}
++
++void scsi_tgt_if_exit(void)
++{
++      tgt_ring_exit(&tx_ring);
++      tgt_ring_exit(&rx_ring);
++      misc_deregister(&tgt_miscdev);
++}
++
++int scsi_tgt_if_init(void)
++{
++      int err;
++
++      err = tgt_ring_init(&tx_ring);
++      if (err)
++              return err;
++
++      err = tgt_ring_init(&rx_ring);
++      if (err)
++              goto free_tx_ring;
++
++      err = misc_register(&tgt_miscdev);
++      if (err)
++              goto free_rx_ring;
++
++      return 0;
++free_rx_ring:
++      tgt_ring_exit(&rx_ring);
++free_tx_ring:
++      tgt_ring_exit(&tx_ring);
++
++      return err;
++}
+diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
+new file mode 100644
+index 0000000..0211b5c
+--- /dev/null
++++ b/drivers/scsi/scsi_tgt_lib.c
+@@ -0,0 +1,741 @@
++/*
++ * SCSI target lib functions
++ *
++ * Copyright (C) 2005 Mike Christie <michaelc@xxxxxxxxxxx>
++ * Copyright (C) 2005 FUJITA Tomonori <tomof@xxxxxxx>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
++ * 02110-1301 USA
++ */
++#include <linux/blkdev.h>
++#include <linux/hash.h>
++#include <linux/module.h>
++#include <linux/pagemap.h>
++#include <scsi/scsi.h>
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_tgt.h>
++#include <../drivers/md/dm-bio-list.h>
++
++#include "scsi_tgt_priv.h"
++
++static struct workqueue_struct *scsi_tgtd;
++static kmem_cache_t *scsi_tgt_cmd_cache;
++
++/*
++ * TODO: this struct will be killed when the block layer supports large bios
++ * and James's work struct code is in
++ */
++struct scsi_tgt_cmd {
++      /* TODO replace work with James b's code */
++      struct work_struct work;
++      /* TODO replace the lists with a large bio */
++      struct bio_list xfer_done_list;
++      struct bio_list xfer_list;
++
++      struct list_head hash_list;
++      struct request *rq;
++      u64 tag;
++
++      void *buffer;
++      unsigned bufflen;
++};
++
++#define TGT_HASH_ORDER        4
++#define cmd_hashfn(tag)       hash_long((unsigned long) (tag), TGT_HASH_ORDER)
++
++struct scsi_tgt_queuedata {
++      struct Scsi_Host *shost;
++      struct list_head cmd_hash[1 << TGT_HASH_ORDER];
++      spinlock_t cmd_hash_lock;
++};
++
++/*
++ * Function:  scsi_host_get_command()
++ *
++ * Purpose:   Allocate and setup a scsi command block and blk request
++ *
++ * Arguments: shost   - scsi host
++ *            data_dir - dma data dir
++ *            gfp_mask- allocator flags
++ *
++ * Returns:   The allocated scsi command structure.
++ *
++ * This should be called by target LLDs to get a command.
++ */
++struct scsi_cmnd *scsi_host_get_command(struct Scsi_Host *shost,
++                                      enum dma_data_direction data_dir,
++                                      gfp_t gfp_mask)
++{
++      int write = (data_dir == DMA_TO_DEVICE);
++      struct request *rq;
++      struct scsi_cmnd *cmd;
++      struct scsi_tgt_cmd *tcmd;
++
++      /* Bail if we can't get a reference to the device */
++      if (!get_device(&shost->shost_gendev))
++              return NULL;
++
++      tcmd = kmem_cache_alloc(scsi_tgt_cmd_cache, GFP_ATOMIC);
++      if (!tcmd)
++              goto put_dev;
++
++      rq = blk_get_request(shost->uspace_req_q, write, gfp_mask);
++      if (!rq)
++              goto free_tcmd;
++
++      cmd = __scsi_get_command(shost, gfp_mask);
++      if (!cmd)
++              goto release_rq;
++
++      memset(cmd, 0, sizeof(*cmd));
++      cmd->sc_data_direction = data_dir;
++      cmd->jiffies_at_alloc = jiffies;
++      cmd->request = rq;
++
++      rq->special = cmd;
++      rq->flags |= REQ_BLOCK_PC | REQ_SPECIAL;
++      rq->end_io_data = tcmd;
++
++      bio_list_init(&tcmd->xfer_list);
++      bio_list_init(&tcmd->xfer_done_list);
++      tcmd->rq = rq;
++
++      return cmd;
++
++release_rq:
++      blk_put_request(rq);
++free_tcmd:
++      kmem_cache_free(scsi_tgt_cmd_cache, tcmd);
++put_dev:
++      put_device(&shost->shost_gendev);
++      return NULL;
++
++}
++EXPORT_SYMBOL_GPL(scsi_host_get_command);
++
++/*
++ * Function:  scsi_host_put_command()
++ *
++ * Purpose:   Free a scsi command block
++ *
++ * Arguments: shost   - scsi host
++ *            cmd     - command block to free
++ *
++ * Returns:   Nothing.
++ *
++ * Notes:     The command must not belong to any lists.
++ */
++void scsi_host_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
++{
++      struct request_queue *q = shost->uspace_req_q;
++      struct request *rq = cmd->request;
++      struct scsi_tgt_cmd *tcmd = rq->end_io_data;
++      unsigned long flags;
++
++      kmem_cache_free(scsi_tgt_cmd_cache, tcmd);
++
++      spin_lock_irqsave(q->queue_lock, flags);
++      __blk_put_request(q, rq);
++      spin_unlock_irqrestore(q->queue_lock, flags);
++
++      __scsi_put_command(shost, cmd, &shost->shost_gendev);
++}
++EXPORT_SYMBOL_GPL(scsi_host_put_command);
++
++static void scsi_unmap_user_pages(struct scsi_tgt_cmd *tcmd)
++{
++      struct bio *bio;
++
++      /* must call bio_endio in case bio was bounced */
++      while ((bio = bio_list_pop(&tcmd->xfer_done_list))) {
++              bio_endio(bio, bio->bi_size, 0);
++              bio_unmap_user(bio);
++      }
++
++      while ((bio = bio_list_pop(&tcmd->xfer_list))) {
++              bio_endio(bio, bio->bi_size, 0);
++              bio_unmap_user(bio);
++      }
++}
++
++static void cmd_hashlist_del(struct scsi_cmnd *cmd)
++{
++      struct request_queue *q = cmd->request->q;
++      struct scsi_tgt_queuedata *qdata = q->queuedata;
++      unsigned long flags;
++      struct scsi_tgt_cmd *tcmd = cmd->request->end_io_data;
++
++      spin_lock_irqsave(&qdata->cmd_hash_lock, flags);
++      list_del(&tcmd->hash_list);
++      spin_unlock_irqrestore(&qdata->cmd_hash_lock, flags);
++}
++
++static void scsi_tgt_cmd_destroy(void *data)
++{
++      struct scsi_cmnd *cmd = data;
++      struct scsi_tgt_cmd *tcmd = cmd->request->end_io_data;
++
++      dprintk("cmd %p %d %lu\n", cmd, cmd->sc_data_direction,
++              rq_data_dir(cmd->request));
++      /*
++       * We fix rq->cmd_flags here since when we told bio_map_user
++       * to write vm for WRITE commands, blk_rq_bio_prep set
++       * rq_data_dir the flags to READ.
++       */
++      if (cmd->sc_data_direction == DMA_TO_DEVICE)
++              cmd->request->flags |= REQ_RW;
++      else
++              cmd->request->flags &= ~REQ_RW;
++
++      scsi_unmap_user_pages(tcmd);
++      scsi_host_put_command(scsi_tgt_cmd_to_host(cmd), cmd);
++}
++
++static void init_scsi_tgt_cmd(struct request *rq, struct scsi_tgt_cmd *tcmd,
++                            u64 tag)
++{
++      struct scsi_tgt_queuedata *qdata = rq->q->queuedata;
++      unsigned long flags;
++      struct list_head *head;
++
++      tcmd->tag = tag;
++      spin_lock_irqsave(&qdata->cmd_hash_lock, flags);
++      head = &qdata->cmd_hash[cmd_hashfn(tag)];
++      list_add(&tcmd->hash_list, head);
++      spin_unlock_irqrestore(&qdata->cmd_hash_lock, flags);
++}
++
++/*
++ * scsi_tgt_alloc_queue - setup queue used for message passing
++ * shost: scsi host
++ *
++ * This should be called by the LLD after host allocation.
++ * And will be released when the host is released.
++ */
++int scsi_tgt_alloc_queue(struct Scsi_Host *shost)
++{
++      struct scsi_tgt_queuedata *queuedata;
++      struct request_queue *q;
++      int err, i;
++
++      /*
++       * Do we need to send a netlink event or should uspace
++       * just respond to the hotplug event?
++       */
++      q = __scsi_alloc_queue(shost, NULL);
++      if (!q)
++              return -ENOMEM;
++
++      queuedata = kzalloc(sizeof(*queuedata), GFP_KERNEL);
++      if (!queuedata) {
++              err = -ENOMEM;
++              goto cleanup_queue;
++      }
++      queuedata->shost = shost;
++      q->queuedata = queuedata;
++
++      /*
++       * this is a silly hack. We should probably just queue as many
++       * command as is recvd to userspace. uspace can then make
++       * sure we do not overload the HBA
++       */
++      q->nr_requests = shost->hostt->can_queue;
++      /*
++       * We currently only support software LLDs so this does
++       * not matter for now. Do we need this for the cards we support?
++       * If so we should make it a host template value.
++       */
++      blk_queue_dma_alignment(q, 0);
++      shost->uspace_req_q = q;
++
++      for (i = 0; i < ARRAY_SIZE(queuedata->cmd_hash); i++)
++              INIT_LIST_HEAD(&queuedata->cmd_hash[i]);
++      spin_lock_init(&queuedata->cmd_hash_lock);
++
++      return 0;
++
++cleanup_queue:
++      blk_cleanup_queue(q);
++      return err;
++}
++EXPORT_SYMBOL_GPL(scsi_tgt_alloc_queue);
++
++void scsi_tgt_free_queue(struct Scsi_Host *shost)
++{
++      int i;
++      unsigned long flags;
++      struct request_queue *q = shost->uspace_req_q;
++      struct scsi_cmnd *cmd;
++      struct scsi_tgt_queuedata *qdata = q->queuedata;
++      struct scsi_tgt_cmd *tcmd, *n;
++      LIST_HEAD(cmds);
++
++      spin_lock_irqsave(&qdata->cmd_hash_lock, flags);
++
++      for (i = 0; i < ARRAY_SIZE(qdata->cmd_hash); i++) {
++              list_for_each_entry_safe(tcmd, n, &qdata->cmd_hash[i],
++                                       hash_list) {
++                      list_del(&tcmd->hash_list);
++                      list_add(&tcmd->hash_list, &cmds);
++              }
++      }
++
++      spin_unlock_irqrestore(&qdata->cmd_hash_lock, flags);
++
++      while (!list_empty(&cmds)) {
++              tcmd = list_entry(cmds.next, struct scsi_tgt_cmd, hash_list);
++              list_del(&tcmd->hash_list);
++              cmd = tcmd->rq->special;
++
++              shost->hostt->eh_abort_handler(cmd);
++              scsi_tgt_cmd_destroy(cmd);
++      }
++}
++EXPORT_SYMBOL_GPL(scsi_tgt_free_queue);
++
++struct Scsi_Host *scsi_tgt_cmd_to_host(struct scsi_cmnd *cmd)
++{
++      struct scsi_tgt_queuedata *queue = cmd->request->q->queuedata;
++      return queue->shost;
++}
++EXPORT_SYMBOL_GPL(scsi_tgt_cmd_to_host);
++
++/*
++ * scsi_tgt_queue_command - queue command for userspace processing
++ * @cmd:      scsi command
++ * @scsilun:  scsi lun
++ * @tag:      unique value to identify this command for tmf
++ */
++int scsi_tgt_queue_command(struct scsi_cmnd *cmd, struct scsi_lun *scsilun,
++                         u64 tag)
++{
++      struct scsi_tgt_cmd *tcmd = cmd->request->end_io_data;
++      int err;
++
++      init_scsi_tgt_cmd(cmd->request, tcmd, tag);
++      err = scsi_tgt_uspace_send_cmd(cmd, scsilun, tag);
++      if (err)
++              cmd_hashlist_del(cmd);
++
++      return err;
++}
++EXPORT_SYMBOL_GPL(scsi_tgt_queue_command);
++
++/*
++ * This is run from a interrpt handler normally and the unmap
++ * needs process context so we must queue
++ */
++static void scsi_tgt_cmd_done(struct scsi_cmnd *cmd)
++{
++      struct scsi_tgt_cmd *tcmd = cmd->request->end_io_data;
++
++      dprintk("cmd %p %lu\n", cmd, rq_data_dir(cmd->request));
++
++      scsi_tgt_uspace_send_status(cmd, tcmd->tag);
++      INIT_WORK(&tcmd->work, scsi_tgt_cmd_destroy, cmd);
++      queue_work(scsi_tgtd, &tcmd->work);
++}
++
++static int __scsi_tgt_transfer_response(struct scsi_cmnd *cmd)
++{
++      struct Scsi_Host *shost = scsi_tgt_cmd_to_host(cmd);
++      int err;
++
++      dprintk("cmd %p %lu\n", cmd, rq_data_dir(cmd->request));
++
++      err = shost->hostt->transfer_response(cmd, scsi_tgt_cmd_done);
++      switch (err) {
++      case SCSI_MLQUEUE_HOST_BUSY:
++      case SCSI_MLQUEUE_DEVICE_BUSY:
++              return -EAGAIN;
++      }
++
++      return 0;
++}
++
++static void scsi_tgt_transfer_response(struct scsi_cmnd *cmd)
++{
++      struct scsi_tgt_cmd *tcmd = cmd->request->end_io_data;
++      int err;
++
++      err = __scsi_tgt_transfer_response(cmd);
++      if (!err)
++              return;
++
++      cmd->result = DID_BUS_BUSY << 16;
++      err = scsi_tgt_uspace_send_status(cmd, tcmd->tag);
++      if (err <= 0)
++              /* the eh will have to pick this up */
++              printk(KERN_ERR "Could not send cmd %p status\n", cmd);
++}
++
++static int scsi_tgt_init_cmd(struct scsi_cmnd *cmd, gfp_t gfp_mask)
++{
++      struct request *rq = cmd->request;
++      struct scsi_tgt_cmd *tcmd = rq->end_io_data;
++      int count;
++
++      cmd->use_sg = rq->nr_phys_segments;
++      cmd->request_buffer = scsi_alloc_sgtable(cmd, gfp_mask);
++      if (!cmd->request_buffer)
++              return -ENOMEM;
++
++      cmd->request_bufflen = rq->data_len;
++
++      dprintk("cmd %p addr %p cnt %d %lu\n", cmd, tcmd->buffer, cmd->use_sg,
++              rq_data_dir(rq));
++      count = blk_rq_map_sg(rq->q, rq, cmd->request_buffer);
++      if (likely(count <= cmd->use_sg)) {
++              cmd->use_sg = count;
++              return 0;
++      }
++
++      eprintk("cmd %p addr %p cnt %d\n", cmd, tcmd->buffer, cmd->use_sg);
++      scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
++      return -EINVAL;
++}
++
++/* TODO: test this crap and replace bio_map_user with new interface maybe */
++static int scsi_map_user_pages(struct scsi_tgt_cmd *tcmd, struct scsi_cmnd 
*cmd,
++                             int rw)
++{
++      struct request_queue *q = cmd->request->q;
++      struct request *rq = cmd->request;
++      void *uaddr = tcmd->buffer;
++      unsigned int len = tcmd->bufflen;
++      struct bio *bio;
++      int err;
++
++      while (len > 0) {
++              dprintk("%lx %u\n", (unsigned long) uaddr, len);
++              bio = bio_map_user(q, NULL, (unsigned long) uaddr, len, rw);
++              if (IS_ERR(bio)) {
++                      err = PTR_ERR(bio);
++                      dprintk("fail to map %lx %u %d %x\n",
++                              (unsigned long) uaddr, len, err, cmd->cmnd[0]);
++                      goto unmap_bios;
++              }
++
++              uaddr += bio->bi_size;
++              len -= bio->bi_size;
++
++              /*
++               * The first bio is added and merged. We could probably
++               * try to add others using scsi_merge_bio() but for now
++               * we keep it simple. The first bio should be pretty large
++               * (either hitting the 1 MB bio pages limit or a queue limit)
++               * already but for really large IO we may want to try and
++               * merge these.
++               */
++              if (!rq->bio) {
++                      blk_rq_bio_prep(q, rq, bio);
++                      rq->data_len = bio->bi_size;
++              } else
++                      /* put list of bios to transfer in next go around */
++                      bio_list_add(&tcmd->xfer_list, bio);
++      }
++
++      cmd->offset = 0;
++      err = scsi_tgt_init_cmd(cmd, GFP_KERNEL);
++      if (err)
++              goto unmap_bios;
++
++      return 0;
++
++unmap_bios:
++      if (rq->bio) {
++              bio_unmap_user(rq->bio);
++              while ((bio = bio_list_pop(&tcmd->xfer_list)))
++                      bio_unmap_user(bio);
++      }
++
++      return err;
++}
++
++static int scsi_tgt_transfer_data(struct scsi_cmnd *);
++
++static void scsi_tgt_data_transfer_done(struct scsi_cmnd *cmd)
++{
++      struct scsi_tgt_cmd *tcmd = cmd->request->end_io_data;
++      struct bio *bio;
++      int err;
++
++      /* should we free resources here on error ? */
++      if (cmd->result) {
++send_uspace_err:
++              err = scsi_tgt_uspace_send_status(cmd, tcmd->tag);
++              if (err <= 0)
++                      /* the tgt uspace eh will have to pick this up */
++                      printk(KERN_ERR "Could not send cmd %p status\n", cmd);
++              return;
++      }
++
++      dprintk("cmd %p request_bufflen %u bufflen %u\n",
++              cmd, cmd->request_bufflen, tcmd->bufflen);
++
++      scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
++      bio_list_add(&tcmd->xfer_done_list, cmd->request->bio);
++
++      tcmd->buffer += cmd->request_bufflen;
++      cmd->offset += cmd->request_bufflen;
++
++      if (!tcmd->xfer_list.head) {
++              scsi_tgt_transfer_response(cmd);
++              return;
++      }
++
++      dprintk("cmd2 %p request_bufflen %u bufflen %u\n",
++              cmd, cmd->request_bufflen, tcmd->bufflen);
++
++      bio = bio_list_pop(&tcmd->xfer_list);
++      BUG_ON(!bio);
++
++      blk_rq_bio_prep(cmd->request->q, cmd->request, bio);
++      cmd->request->data_len = bio->bi_size;
++      err = scsi_tgt_init_cmd(cmd, GFP_ATOMIC);
++      if (err) {
++              cmd->result = DID_ERROR << 16;
++              goto send_uspace_err;
++      }
++
++      if (scsi_tgt_transfer_data(cmd)) {
++              cmd->result = DID_NO_CONNECT << 16;
++              goto send_uspace_err;
++      }
++}
++
++static int scsi_tgt_transfer_data(struct scsi_cmnd *cmd)
++{
++      int err;
++      struct Scsi_Host *host = scsi_tgt_cmd_to_host(cmd);
++
++      err = host->hostt->transfer_data(cmd, scsi_tgt_data_transfer_done);
++      switch (err) {
++              case SCSI_MLQUEUE_HOST_BUSY:
++              case SCSI_MLQUEUE_DEVICE_BUSY:
++                      return -EAGAIN;
++      default:
++              return 0;
++      }
++}
++
++static int scsi_tgt_copy_sense(struct scsi_cmnd *cmd, unsigned long uaddr,
++                              unsigned len)
++{
++      char __user *p = (char __user *) uaddr;
++
++      if (copy_from_user(cmd->sense_buffer, p,
++                         min_t(unsigned, SCSI_SENSE_BUFFERSIZE, len))) {
++              printk(KERN_ERR "Could not copy the sense buffer\n");
++              return -EIO;
++      }
++      return 0;
++}
++
++static int scsi_tgt_abort_cmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
++{
++      int err;
++
++      err = shost->hostt->eh_abort_handler(cmd);
++      if (err)
++              eprintk("fail to abort %p\n", cmd);
++
++      scsi_tgt_cmd_destroy(cmd);
++      return err;
++}
++
++static struct request *tgt_cmd_hash_lookup(struct request_queue *q, u64 tag)
++{
++      struct scsi_tgt_queuedata *qdata = q->queuedata;
++      struct request *rq = NULL;
++      struct list_head *head;
++      struct scsi_tgt_cmd *tcmd;
++      unsigned long flags;
++
++      head = &qdata->cmd_hash[cmd_hashfn(tag)];
++      spin_lock_irqsave(&qdata->cmd_hash_lock, flags);
++      list_for_each_entry(tcmd, head, hash_list) {
++              if (tcmd->tag == tag) {
++                      rq = tcmd->rq;
++                      list_del(&tcmd->hash_list);
++                      break;
++              }
++      }
++      spin_unlock_irqrestore(&qdata->cmd_hash_lock, flags);
++
++      return rq;
++}
++
++int scsi_tgt_kspace_exec(int host_no, u64 tag, int result, u32 len,
++                       unsigned long uaddr, u8 rw)
++{
++      struct Scsi_Host *shost;
++      struct scsi_cmnd *cmd;
++      struct request *rq;
++      struct scsi_tgt_cmd *tcmd;
++      int err = 0;
++
++      dprintk("%d %llu %d %u %lx %u\n", host_no, (unsigned long long) tag,
++              result, len, uaddr, rw);
++
++      /* TODO: replace with a O(1) alg */
++      shost = scsi_host_lookup(host_no);
++      if (IS_ERR(shost)) {
++              printk(KERN_ERR "Could not find host no %d\n", host_no);
++              return -EINVAL;
++      }
++
++      if (!shost->uspace_req_q) {
++              printk(KERN_ERR "Not target scsi host %d\n", host_no);
++              goto done;
++      }
++
++      rq = tgt_cmd_hash_lookup(shost->uspace_req_q, tag);
++      if (!rq) {
++              printk(KERN_ERR "Could not find tag %llu\n",
++                     (unsigned long long) tag);
++              err = -EINVAL;
++              goto done;
++      }
++      cmd = rq->special;
++
++      dprintk("cmd %p result %d len %d bufflen %u %lu %x\n", cmd,
++              result, len, cmd->request_bufflen, rq_data_dir(rq), 
cmd->cmnd[0]);
++
++      if (result == TASK_ABORTED) {
++              scsi_tgt_abort_cmd(shost, cmd);
++              goto done;
++      }
++      /*
++       * store the userspace values here, the working values are
++       * in the request_* values
++       */
++      tcmd = cmd->request->end_io_data;
++      tcmd->buffer = (void *)uaddr;
++      tcmd->bufflen = len;
++      cmd->result = result;
++
++      if (!tcmd->bufflen || cmd->request_buffer) {
++              err = __scsi_tgt_transfer_response(cmd);
++              goto done;
++      }
++
++      /*
++       * TODO: Do we need to handle case where request does not
++       * align with LLD.
++       */
++      err = scsi_map_user_pages(rq->end_io_data, cmd, rw);
++      if (err) {
++              eprintk("%p %d\n", cmd, err);
++              err = -EAGAIN;
++              goto done;
++      }
++
++      /* userspace failure */
++      if (cmd->result) {
++              if (status_byte(cmd->result) == CHECK_CONDITION)
++                      scsi_tgt_copy_sense(cmd, uaddr, len);
++              err = __scsi_tgt_transfer_response(cmd);
++              goto done;
++      }
++      /* ask the target LLD to transfer the data to the buffer */
++      err = scsi_tgt_transfer_data(cmd);
++
++done:
++      scsi_host_put(shost);
++      return err;
++}
++
++int scsi_tgt_tsk_mgmt_request(struct Scsi_Host *shost, int function, u64 tag,
++                            struct scsi_lun *scsilun, void *data)
++{
++      int err;
++
++      /* TODO: need to retry if this fails. */
++      err = scsi_tgt_uspace_send_tsk_mgmt(shost->host_no, function,
++                                          tag, scsilun, data);
++      if (err < 0)
++              eprintk("The task management request lost!\n");
++      return err;
++}
++EXPORT_SYMBOL_GPL(scsi_tgt_tsk_mgmt_request);
++
++int scsi_tgt_kspace_tsk_mgmt(int host_no, u64 mid, int result)
++{
++      struct Scsi_Host *shost;
++      int err = -EINVAL;
++
++      dprintk("%d %d %llx\n", host_no, result, (unsigned long long) mid);
++
++      shost = scsi_host_lookup(host_no);
++      if (IS_ERR(shost)) {
++              printk(KERN_ERR "Could not find host no %d\n", host_no);
++              return err;
++      }
++
++      if (!shost->uspace_req_q) {
++              printk(KERN_ERR "Not target scsi host %d\n", host_no);
++              goto done;
++      }
++
++      err = shost->hostt->tsk_mgmt_response(mid, result);
++done:
++      scsi_host_put(shost);
++      return err;
++}
++
++static int __init scsi_tgt_init(void)
++{
++      int err;
++
++      scsi_tgt_cmd_cache = kmem_cache_create("scsi_tgt_cmd",
++                                             sizeof(struct scsi_tgt_cmd),
++                                             0, 0, NULL, NULL);
++      if (!scsi_tgt_cmd_cache)
++              return -ENOMEM;
++
++      scsi_tgtd = create_workqueue("scsi_tgtd");
++      if (!scsi_tgtd) {
++              err = -ENOMEM;
++              goto free_kmemcache;
++      }
++
++      err = scsi_tgt_if_init();
++      if (err)
++              goto destroy_wq;
++
++      return 0;
++
++destroy_wq:
++      destroy_workqueue(scsi_tgtd);
++free_kmemcache:
++      kmem_cache_destroy(scsi_tgt_cmd_cache);
++      return err;
++}
++
++static void __exit scsi_tgt_exit(void)
++{
++      destroy_workqueue(scsi_tgtd);
++      scsi_tgt_if_exit();
++      kmem_cache_destroy(scsi_tgt_cmd_cache);
++}
++
++module_init(scsi_tgt_init);
++module_exit(scsi_tgt_exit);
++
++MODULE_DESCRIPTION("SCSI target core");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/scsi/scsi_tgt_priv.h b/drivers/scsi/scsi_tgt_priv.h
+new file mode 100644
+index 0000000..84488c5
+--- /dev/null
++++ b/drivers/scsi/scsi_tgt_priv.h
+@@ -0,0 +1,25 @@
++struct scsi_cmnd;
++struct scsi_lun;
++struct Scsi_Host;
++struct task_struct;
++
++/* tmp - will replace with SCSI logging stuff */
++#define eprintk(fmt, args...)                                 \
++do {                                                          \
++      printk("%s(%d) " fmt, __FUNCTION__, __LINE__, ##args);  \
++} while (0)
++
++#define dprintk(fmt, args...)
++/* #define dprintk eprintk */
++
++extern void scsi_tgt_if_exit(void);
++extern int scsi_tgt_if_init(void);
++
++extern int scsi_tgt_uspace_send_cmd(struct scsi_cmnd *cmd, struct scsi_lun 
*lun,
++                                  u64 tag);
++extern int scsi_tgt_uspace_send_status(struct scsi_cmnd *cmd, u64 tag);
++extern int scsi_tgt_kspace_exec(int host_no, u64 tag, int result, u32 len,
++                              unsigned long uaddr, u8 rw);
++extern int scsi_tgt_uspace_send_tsk_mgmt(int host_no, int function, u64 tag,
++                                       struct scsi_lun *scsilun, void *data);
++extern int scsi_tgt_kspace_tsk_mgmt(int host_no, u64 mid, int result);
+diff --git a/fs/bio.c b/fs/bio.c
+index 0cba08f..59f4c02 100644
+--- a/fs/bio.c
++++ b/fs/bio.c
+@@ -558,10 +558,8 @@ struct bio *bio_copy_user(request_queue_
+                       break;
+               }
+ 
+-              if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
+-                      ret = -EINVAL;
++              if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
+                       break;
+-              }
+ 
+               len -= bytes;
+       }
+@@ -620,10 +618,9 @@ static struct bio *__bio_map_user_iov(re
+ 
+               nr_pages += end - start;
+               /*
+-               * transfer and buffer must be aligned to at least hardsector
+-               * size for now, in the future we can relax this restriction
++               * buffer must be aligned to at least hardsector size for now
+                */
+-              if ((uaddr & queue_dma_alignment(q)) || (len & 
queue_dma_alignment(q)))
++              if (uaddr & queue_dma_alignment(q))
+                       return ERR_PTR(-EINVAL);
+       }
+ 
+@@ -751,7 +748,6 @@ struct bio *bio_map_user_iov(request_que
+                            int write_to_vm)
+ {
+       struct bio *bio;
+-      int len = 0, i;
+ 
+       bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm);
+ 
+@@ -766,18 +762,7 @@ struct bio *bio_map_user_iov(request_que
+        */
+       bio_get(bio);
+ 
+-      for (i = 0; i < iov_count; i++)
+-              len += iov[i].iov_len;
+-
+-      if (bio->bi_size == len)
+-              return bio;
+-
+-      /*
+-       * don't support partial mappings
+-       */
+-      bio_endio(bio, bio->bi_size, 0);
+-      bio_unmap_user(bio);
+-      return ERR_PTR(-EINVAL);
++      return bio;
+ }
+ 
+ static void __bio_unmap_user(struct bio *bio)
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 860e7a4..45c007d 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -608,10 +608,11 @@ extern void blk_sync_queue(struct reques
+ extern void __blk_stop_queue(request_queue_t *q);
+ extern void blk_run_queue(request_queue_t *);
+ extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
+-extern int blk_rq_map_user(request_queue_t *, struct request *, void __user 
*, unsigned int);
+-extern int blk_rq_unmap_user(struct bio *, unsigned int);
++extern int blk_rq_map_user(request_queue_t *, struct request *, void __user 
*, unsigned long);
++extern int blk_rq_unmap_user(struct request *);
+ extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, 
unsigned int, gfp_t);
+-extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct 
sg_iovec *, int);
++extern int blk_rq_map_user_iov(request_queue_t *, struct request *,
++                             struct sg_iovec *, int, unsigned int);
+ extern int blk_execute_rq(request_queue_t *, struct gendisk *,
+                         struct request *, int);
+ extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *,
+diff --git a/include/scsi/libsrp.h b/include/scsi/libsrp.h
+new file mode 100644
+index 0000000..d143171
+--- /dev/null
++++ b/include/scsi/libsrp.h
+@@ -0,0 +1,77 @@
++#ifndef __LIBSRP_H__
++#define __LIBSRP_H__
++
++#include <linux/list.h>
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_host.h>
++#include <scsi/srp.h>
++
++enum iue_flags {
++      V_DIOVER,
++      V_WRITE,
++      V_LINKED,
++      V_FLYING,
++};
++
++struct srp_buf {
++      dma_addr_t dma;
++      void *buf;
++};
++
++struct srp_queue {
++      void *pool;
++      void *items;
++      struct kfifo *queue;
++      spinlock_t lock;
++};
++
++struct srp_target {
++      struct Scsi_Host *shost;
++      struct device *dev;
++
++      spinlock_t lock;
++      struct list_head cmd_queue;
++
++      size_t srp_iu_size;
++      struct srp_queue iu_queue;
++      size_t rx_ring_size;
++      struct srp_buf **rx_ring;
++
++      void *ldata;
++};
++
++struct iu_entry {
++      struct srp_target *target;
++
++      struct list_head ilist;
++      dma_addr_t remote_token;
++      unsigned long flags;
++
++      struct srp_buf *sbuf;
++};
++
++typedef int (srp_rdma_t)(struct scsi_cmnd *, struct scatterlist *, int,
++                       struct srp_direct_buf *, int,
++                       enum dma_data_direction, unsigned int);
++extern int srp_target_alloc(struct srp_target *, struct device *, size_t, 
size_t);
++extern void srp_target_free(struct srp_target *);
++
++extern struct iu_entry *srp_iu_get(struct srp_target *);
++extern void srp_iu_put(struct iu_entry *);
++
++extern int srp_cmd_queue(struct Scsi_Host *, struct srp_cmd *, void *, u64);
++extern int srp_transfer_data(struct scsi_cmnd *, struct srp_cmd *,
++                           srp_rdma_t, int, int);
++
++
++static inline struct srp_target *host_to_srp_target(struct Scsi_Host *host)
++{
++      return (struct srp_target *) host->hostdata;
++}
++
++static inline int srp_cmd_direction(struct srp_cmd *cmd)
++{
++      return (cmd->buf_fmt >> 4) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
++}
++
++#endif
+diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
+index 7529f43..df8990e 100644
+--- a/include/scsi/scsi_cmnd.h
++++ b/include/scsi/scsi_cmnd.h
+@@ -8,6 +8,7 @@ #include <linux/timer.h>
+ 
+ struct request;
+ struct scatterlist;
++struct Scsi_Host;
+ struct scsi_device;
+ struct scsi_request;
+ 
+@@ -85,6 +86,9 @@ #define MAX_COMMAND_SIZE     16
+       unsigned bufflen;       /* Size of data buffer */
+       void *buffer;           /* Data buffer */
+ 
++      /* offset in cmd we are at (for multi-transfer tgt cmds) */
++      unsigned offset;
++
+       unsigned underflow;     /* Return error if less than
+                                  this amount is transferred */
+       unsigned old_underflow; /* save underflow here when reusing the
+@@ -148,8 +152,14 @@ #define SCSI_STATE_MLQUEUE         0x100
+ 
+ 
+ extern struct scsi_cmnd *scsi_get_command(struct scsi_device *, gfp_t);
++extern struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *, gfp_t);
+ extern void scsi_put_command(struct scsi_cmnd *);
++extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *,
++                             struct device *);
+ extern void scsi_io_completion(struct scsi_cmnd *, unsigned int, unsigned 
int);
+ extern void scsi_finish_command(struct scsi_cmnd *cmd);
+ 
++extern struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *, gfp_t);
++extern void scsi_free_sgtable(struct scatterlist *, int);
++
+ #endif /* _SCSI_SCSI_CMND_H */
+diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
+index 8279929..3e1b6b6 100644
+--- a/include/scsi/scsi_host.h
++++ b/include/scsi/scsi_host.h
+@@ -7,6 +7,7 @@ #include <linux/types.h>
+ #include <linux/workqueue.h>
+ #include <linux/mutex.h>
+ 
++struct request_queue;
+ struct block_device;
+ struct completion;
+ struct module;
+@@ -123,6 +124,39 @@ #endif
+                            void (*done)(struct scsi_cmnd *));
+ 
+       /*
++       * The transfer functions are used to queue a scsi command to
++       * the LLD. When the driver is finished processing the command
++       * the done callback is invoked.
++       *
++       * return values: see queuecommand
++       *
++       * If the LLD accepts the cmd, it should set the result to an
++       * appropriate value when completed before calling the done function.
++       *
++       * STATUS: REQUIRED FOR TARGET DRIVERS
++       */
++      /* TODO: rename */
++      int (* transfer_response)(struct scsi_cmnd *,
++                                void (*done)(struct scsi_cmnd *));
++      /*
++       * This is called to inform the LLD to transfer cmd->request_bufflen
++       * bytes of the cmd at cmd->offset in the cmd. The cmd->use_sg
++       * speciefies the number of scatterlist entried in the command
++       * and cmd->request_buffer contains the scatterlist.
++       *
++       * If the command cannot be processed in one transfer_data call
++       * becuase a scatterlist within the LLD's limits cannot be
++       * created then transfer_data will be called multiple times.
++       * It is initially called from process context, and later
++       * calls are from the interrup context.
++       */
++      int (* transfer_data)(struct scsi_cmnd *,
++                            void (*done)(struct scsi_cmnd *));
++
++      /* Used as callback for the completion of task management request. */
++      int (* tsk_mgmt_response)(u64 mid, int result);
++
++      /*
+        * This is an error handling strategy routine.  You don't need to
+        * define one of these if you don't want to - there is a default
+        * routine that is present that should work in most cases.  For those
+@@ -572,6 +606,12 @@ struct Scsi_Host {
+        */
+       unsigned int max_host_blocked;
+ 
++      /*
++       * q used for scsi_tgt msgs, async events or any other requests that
++       * need to be processed in userspace
++       */
++      struct request_queue *uspace_req_q;
++
+       /* legacy crap */
+       unsigned long base;
+       unsigned long io_port;
+@@ -674,6 +714,9 @@ extern void scsi_unblock_requests(struct
+ extern void scsi_block_requests(struct Scsi_Host *);
+ 
+ struct class_container;
++
++extern struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
++                                              void (*) (struct request_queue 
*));
+ /*
+  * These two functions are used to allocate and free a pseudo device
+  * which will connect to the host adapter itself rather than any
+diff --git a/include/scsi/scsi_tgt.h b/include/scsi/scsi_tgt.h
+new file mode 100644
+index 0000000..4f44279
+--- /dev/null
++++ b/include/scsi/scsi_tgt.h
+@@ -0,0 +1,19 @@
++/*
++ * SCSI target definitions
++ */
++
++#include <linux/dma-mapping.h>
++
++struct Scsi_Host;
++struct scsi_cmnd;
++struct scsi_lun;
++
++extern struct Scsi_Host *scsi_tgt_cmd_to_host(struct scsi_cmnd *);
++extern int scsi_tgt_alloc_queue(struct Scsi_Host *);
++extern void scsi_tgt_free_queue(struct Scsi_Host *);
++extern int scsi_tgt_queue_command(struct scsi_cmnd *, struct scsi_lun *, u64);
++extern int scsi_tgt_tsk_mgmt_request(struct Scsi_Host *, int, u64, struct 
scsi_lun *,
++                                   void *);
++extern struct scsi_cmnd *scsi_host_get_command(struct Scsi_Host *,
++                                             enum dma_data_direction, gfp_t);
++extern void scsi_host_put_command(struct Scsi_Host *, struct scsi_cmnd *);
+diff --git a/include/scsi/scsi_tgt_if.h b/include/scsi/scsi_tgt_if.h
+new file mode 100644
+index 0000000..4aad0fe
+--- /dev/null
++++ b/include/scsi/scsi_tgt_if.h
+@@ -0,0 +1,91 @@
++/*
++ * SCSI target kernel/user interface
++ *
++ * Copyright (C) 2005 FUJITA Tomonori <tomof@xxxxxxx>
++ * Copyright (C) 2005 Mike Christie <michaelc@xxxxxxxxxxx>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
++ * 02110-1301 USA
++ */
++#ifndef __SCSI_TARGET_IF_H
++#define __SCSI_TARGET_IF_H
++
++/* user -> kernel */
++#define       TGT_UEVENT_CMD_RSP      0x0001
++#define       TGT_UEVENT_TSK_MGMT_RSP 0x0002
++
++/* kernel -> user */
++#define       TGT_KEVENT_CMD_REQ      0x1001
++#define       TGT_KEVENT_CMD_DONE     0x1002
++#define       TGT_KEVENT_TSK_MGMT_REQ 0x1003
++
++struct tgt_event_hdr {
++      uint16_t version;
++      uint16_t status;
++      uint16_t type;
++      uint16_t len;
++} __attribute__ ((aligned (sizeof(uint64_t))));
++
++struct tgt_event {
++      struct tgt_event_hdr hdr;
++
++      union {
++              /* user-> kernel */
++              struct {
++                      int host_no;
++                      uint32_t len;
++                      int result;
++                      aligned_u64 uaddr;
++                      uint8_t rw;
++                      aligned_u64 tag;
++              } cmd_rsp;
++              struct {
++                      int host_no;
++                      aligned_u64 mid;
++                      int result;
++              } tsk_mgmt_rsp;
++
++
++              /* kernel -> user */
++              struct {
++                      int host_no;
++                      uint32_t data_len;
++                      uint8_t scb[16];
++                      uint8_t lun[8];
++                      int attribute;
++                      aligned_u64 tag;
++                      aligned_u64 uaddr;
++              } cmd_req;
++              struct {
++                      int host_no;
++                      aligned_u64 tag;
++                      int result;
++              } cmd_done;
++              struct {
++                      int host_no;
++                      int function;
++                      aligned_u64 tag;
++                      uint8_t lun[8];
++                      aligned_u64 mid;
++              } tsk_mgmt_req;
++      } p;
++} __attribute__ ((aligned (sizeof(uint64_t))));
++
++#define TGT_RING_SIZE (1UL << 16)
++#define TGT_RING_PAGES (TGT_RING_SIZE >> PAGE_SHIFT)
++#define TGT_EVENT_PER_PAGE (PAGE_SIZE / sizeof(struct tgt_event))
++#define TGT_MAX_EVENTS (TGT_EVENT_PER_PAGE * TGT_RING_PAGES)
++
++#endif

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
WARNING - OLD ARCHIVES

xen-devel

[Xen-devel] [PATCH 1/4] add scsi-target and IO_CMD_EPOLL_WAIT patches