|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2 1/3] vpci: allow queueing of mapping operations
On Thu, Jul 24, 2025 at 03:00:46AM +0000, dmkhn@xxxxxxxxx wrote:
> On Wed, Jul 23, 2025 at 12:37:41PM -0400, Stewart Hildebrand wrote:
> > Introduce vPCI BAR mapping task queue. Decouple map operation state from
> > general vPCI state: in particular, move the per-BAR rangeset out of
> > struct vpci and into the map task struct.
> >
> > This is preparatory work for further changes that need to perform
> > multiple unmap/map operations before returning to guest.
> >
> > Signed-off-by: Stewart Hildebrand <stewart.hildebrand@xxxxxxx>
> > ---
> > v1->v2:
> > * new patch
> >
> > Related: 622bdd962822 ("vpci/header: handle p2m range sets per BAR")
> > ---
> > xen/common/domain.c | 4 +
> > xen/drivers/vpci/header.c | 197 +++++++++++++++++++++++---------------
> > xen/drivers/vpci/vpci.c | 3 -
> > xen/include/xen/vpci.h | 16 +++-
> > 4 files changed, 139 insertions(+), 81 deletions(-)
> >
> > diff --git a/xen/common/domain.c b/xen/common/domain.c
> > index 303c338ef293..214795e2d2fe 100644
> > --- a/xen/common/domain.c
> > +++ b/xen/common/domain.c
> > @@ -459,6 +459,10 @@ struct vcpu *vcpu_create(struct domain *d, unsigned
> > int vcpu_id)
> > d->vcpu[prev_id]->next_in_list = v;
> > }
> >
> > +#ifdef CONFIG_HAS_VPCI
> > + INIT_LIST_HEAD(&v->vpci.task_queue);
> > +#endif
> > +
> > /* Must be called after making new vcpu visible to for_each_vcpu(). */
> > vcpu_check_shutdown(v);
> >
> > diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> > index bb76e707992c..df065a5f5faf 100644
> > --- a/xen/drivers/vpci/header.c
> > +++ b/xen/drivers/vpci/header.c
> > @@ -34,7 +34,7 @@
> >
> > struct map_data {
> > struct domain *d;
> > - const struct vpci_bar *bar;
> > + const struct vpci_bar_map *bar;
>
> Perhaps s/vpci_bar_map/vpci_bar_mmio/g to highlight the BAR type?
>
> > bool map;
> > };
> >
> > @@ -173,31 +173,23 @@ static void modify_decoding(const struct pci_dev
> > *pdev, uint16_t cmd,
> > ASSERT_UNREACHABLE();
> > }
> >
> > -bool vpci_process_pending(struct vcpu *v)
> > +static bool vpci_process_map_task(struct vpci_map_task *task)
> > {
> > - const struct pci_dev *pdev = v->vpci.pdev;
> > - struct vpci_header *header = NULL;
> > + const struct pci_dev *pdev = task->pdev;
> > unsigned int i;
> >
> > if ( !pdev )
> > return false;
> >
> > - read_lock(&v->domain->pci_lock);
> > -
> > - if ( !pdev->vpci || (v->domain != pdev->domain) )
> > - {
> > - v->vpci.pdev = NULL;
> > - read_unlock(&v->domain->pci_lock);
> > + if ( !pdev->vpci || (task->domain != pdev->domain) )
> > return false;
> > - }
> >
> > - header = &pdev->vpci->header;
> > - for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> > + for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
> > {
> > - struct vpci_bar *bar = &header->bars[i];
> > + struct vpci_bar_map *bar = &task->bars[i];
> > struct map_data data = {
> > - .d = v->domain,
> > - .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
> > + .d = task->domain,
> > + .map = task->cmd & PCI_COMMAND_MEMORY,
> > .bar = bar,
> > };
> > int rc;
> > @@ -208,57 +200,79 @@ bool vpci_process_pending(struct vcpu *v)
> > rc = rangeset_consume_ranges(bar->mem, map_range, &data);
> >
> > if ( rc == -ERESTART )
> > - {
> > - read_unlock(&v->domain->pci_lock);
> > return true;
> > - }
> >
> > if ( rc )
> > {
> > spin_lock(&pdev->vpci->lock);
> > /* Disable memory decoding unconditionally on failure. */
> > - modify_decoding(pdev, v->vpci.cmd & ~PCI_COMMAND_MEMORY,
> > + modify_decoding(pdev, task->cmd & ~PCI_COMMAND_MEMORY,
> > false);
> > spin_unlock(&pdev->vpci->lock);
> >
> > - /* Clean all the rangesets */
> > - for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> > - if ( !rangeset_is_empty(header->bars[i].mem) )
> > - rangeset_purge(header->bars[i].mem);
> > -
> > - v->vpci.pdev = NULL;
> > -
> > - read_unlock(&v->domain->pci_lock);
> > -
> > - if ( !is_hardware_domain(v->domain) )
> > - domain_crash(v->domain);
> > + if ( !is_hardware_domain(task->domain) )
> > + domain_crash(task->domain);
> >
> > return false;
> > }
> > }
> > - v->vpci.pdev = NULL;
> >
> > spin_lock(&pdev->vpci->lock);
> > - modify_decoding(pdev, v->vpci.cmd, v->vpci.rom_only);
> > + modify_decoding(pdev, task->cmd, task->rom_only);
> > spin_unlock(&pdev->vpci->lock);
> >
> > - read_unlock(&v->domain->pci_lock);
> > + return false;
> > +}
> > +
> > +static void destroy_map_task(struct vpci_map_task *task)
> > +{
> > + unsigned int i;
> >
> > + if ( !task )
>
> Looks like task is never NULL in the code, suggest ASSERT().
>
> > + return;
> > +
> > + for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
> > + rangeset_destroy(task->bars[i].mem);
> > +
> > + xfree(task);
> > +}
> > +
> > +bool vpci_process_pending(struct vcpu *v)
> > +{
> > + struct vpci_map_task *task;
> > + read_lock(&v->domain->pci_lock);
> > +
> > + while ( (task = list_first_entry_or_null(&v->vpci.task_queue,
> > + struct vpci_map_task,
> > + next)) != NULL )
> > + {
> > + if ( vpci_process_map_task(task) )
> > + {
> > + read_unlock(&v->domain->pci_lock);
>
> I would add local variable and use it here for shorter code:
>
> int rc;
>
> read_lock(&v->domain->pci_lock);
>
> while (...)
> {
> rc = vpci_process_map_task(task);
> if ( rc )
> break;
>
> ...
> }
>
> read_unlock(&v->domain->pci_lock);
>
> return rc;
>
>
>
> > + return true;
> > + }
> > +
> > + list_del(&task->next);
> > + destroy_map_task(task);
> > + }
> > +
> > + read_unlock(&v->domain->pci_lock);
> > return false;
> > }
> >
> > -static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
> > - uint16_t cmd)
> > +static int __init apply_map(struct vpci_map_task *task)
> > {
> > - struct vpci_header *header = &pdev->vpci->header;
> > + struct domain *d = task->domain;
> > + const struct pci_dev *pdev = task->pdev;
> > + uint16_t cmd = task->cmd;
> > int rc = 0;
> > unsigned int i;
> >
> > ASSERT(rw_is_write_locked(&d->pci_lock));
> >
> > - for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> > + for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
> > {
> > - struct vpci_bar *bar = &header->bars[i];
> > + struct vpci_bar_map *bar = &task->bars[i];
> > struct map_data data = { .d = d, .map = true, .bar = bar };
> >
> > if ( rangeset_is_empty(bar->mem) )
> > @@ -283,7 +297,48 @@ static int __init apply_map(struct domain *d, const
> > struct pci_dev *pdev,
> > return rc;
> > }
> >
> > -static void defer_map(const struct pci_dev *pdev, uint16_t cmd, bool
> > rom_only)
> > +static struct vpci_map_task *alloc_map_task(const struct pci_dev *pdev,
> > + uint16_t cmd, bool rom_only)
> > +{
> > + struct vpci_map_task *task = xzalloc(struct vpci_map_task);
> > + unsigned int i;
> > +
>
> I would move allocation outside of initializer and use preferred xvzalloc()
> variant:
>
> task = xvzalloc(struct vpci_map_task);
> > + if ( !task )
> > + return NULL;
> > +
> > + BUILD_BUG_ON(ARRAY_SIZE(task->bars) !=
> > ARRAY_SIZE(pdev->vpci->header.bars));
> > +
> > + for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
> > + {
> > + if ( pdev->vpci->header.bars[i].type >= VPCI_BAR_MEM32 )
>
> I would reduce one level of indentation by
>
> if ( pdev->vpci->header.bars[i].type < VPCI_BAR_MEM32 )
> continue;
>
>
> > + {
> > + char str[32];
> > +
> > + snprintf(str, sizeof(str), "%pp:BAR%u", &pdev->sbdf, i);
> > +
> > + task->bars[i].mem = rangeset_new(pdev->domain, str,
> > + RANGESETF_no_print);
> > +
> > + if ( !task->bars[i].mem )
> > + {
> > + destroy_map_task(task);
>
> What if allocation fails in the middle of the loop, e.g. i == 5?
> I think previously allocated rangesets in this loop should be freed.
Oh, I see that is done in destroy_map_task(); please disregard.
>
> > + return NULL;
> > + }
> > +
> > + task->bars[i].addr = pdev->vpci->header.bars[i].addr;
> > + task->bars[i].guest_addr =
> > pdev->vpci->header.bars[i].guest_addr;
> > + }
> > + }
> > +
> > + task->pdev = pdev;
> > + task->domain = pdev->domain;
> > + task->cmd = cmd;
> > + task->rom_only = rom_only;
> > +
> > + return task;
> > +}
> > +
> > +static void defer_map(struct vpci_map_task *task)
> > {
> > struct vcpu *curr = current;
> >
> > @@ -293,9 +348,9 @@ static void defer_map(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> > * is mapped. This can lead to parallel mapping operations being
> > * started for the same device if the domain is not well-behaved.
> > */
> > - curr->vpci.pdev = pdev;
> > - curr->vpci.cmd = cmd;
> > - curr->vpci.rom_only = rom_only;
> > +
> > + list_add_tail(&task->next, &curr->vpci.task_queue);
> > +
> > /*
> > * Raise a scheduler softirq in order to prevent the guest from
> > resuming
> > * execution with pending mapping operations, to trigger the invocation
> > @@ -310,11 +365,15 @@ static int modify_bars(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> > struct pci_dev *tmp;
> > const struct domain *d;
> > const struct vpci_msix *msix = pdev->vpci->msix;
> > + struct vpci_map_task *task = alloc_map_task(pdev, cmd, rom_only);
>
> Same comment re: having allocation on a separate line.
>
> > unsigned int i, j;
> > int rc;
> >
> > ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
> >
> > + if ( !task )
> > + return -ENOMEM;
> > +
> > /*
> > * Create a rangeset per BAR that represents the current device memory
> > * region and compare it against all the currently active BAR memory
> > @@ -330,12 +389,13 @@ static int modify_bars(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> > for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> > {
> > struct vpci_bar *bar = &header->bars[i];
> > + struct rangeset *mem = task->bars[i].mem;
> > unsigned long start = PFN_DOWN(bar->addr);
> > unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
> > unsigned long start_guest = PFN_DOWN(bar->guest_addr);
> > unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size -
> > 1);
> >
> > - if ( !bar->mem )
> > + if ( !mem )
> > continue;
> >
> > if ( !MAPPABLE_BAR(bar) ||
> > @@ -353,7 +413,7 @@ static int modify_bars(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> > continue;
> > }
> >
> > - ASSERT(rangeset_is_empty(bar->mem));
> > + ASSERT(rangeset_is_empty(mem));
> >
> > /*
> > * Make sure that the guest set address has the same page offset
> > @@ -365,21 +425,23 @@ static int modify_bars(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> > gprintk(XENLOG_G_WARNING,
> > "%pp: can't map BAR%u - offset mismatch: %#lx vs
> > %#lx\n",
> > &pdev->sbdf, i, bar->guest_addr, bar->addr);
> > + destroy_map_task(task);
>
> I think using goto would be justified for doing cleanup in one place.
>
> > return -EINVAL;
> > }
> >
> > - rc = rangeset_add_range(bar->mem, start_guest, end_guest);
> > + rc = rangeset_add_range(mem, start_guest, end_guest);
> > if ( rc )
> > {
> > printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
> > start_guest, end_guest, rc);
> > + destroy_map_task(task);
> > return rc;
> > }
> >
> > /* Check for overlap with the already setup BAR ranges. */
> > for ( j = 0; j < i; j++ )
> > {
> > - struct vpci_bar *prev_bar = &header->bars[j];
> > + struct vpci_bar_map *prev_bar = &task->bars[j];
> >
> > if ( rangeset_is_empty(prev_bar->mem) )
> > continue;
> > @@ -390,16 +452,18 @@ static int modify_bars(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> > gprintk(XENLOG_WARNING,
> > "%pp: failed to remove overlapping range [%lx,
> > %lx]: %d\n",
> > &pdev->sbdf, start_guest, end_guest, rc);
> > + destroy_map_task(task);
> > return rc;
> > }
> > }
> >
> > - rc = pci_sanitize_bar_memory(bar->mem);
> > + rc = pci_sanitize_bar_memory(mem);
> > if ( rc )
> > {
> > gprintk(XENLOG_WARNING,
> > "%pp: failed to sanitize BAR#%u memory: %d\n",
> > &pdev->sbdf, i, rc);
> > + destroy_map_task(task);
> > return rc;
> > }
> > }
> > @@ -413,7 +477,7 @@ static int modify_bars(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> >
> > for ( j = 0; j < ARRAY_SIZE(header->bars); j++ )
> > {
> > - const struct vpci_bar *bar = &header->bars[j];
> > + const struct vpci_bar_map *bar = &task->bars[j];
> >
> > if ( rangeset_is_empty(bar->mem) )
> > continue;
> > @@ -424,6 +488,7 @@ static int modify_bars(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> > gprintk(XENLOG_WARNING,
> > "%pp: failed to remove MSIX table [%lx, %lx]: %d\n",
> > &pdev->sbdf, start, end, rc);
> > + destroy_map_task(task);
> > return rc;
> > }
> > }
> > @@ -468,8 +533,9 @@ static int modify_bars(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> > for ( j = 0; j < ARRAY_SIZE(header->bars); j++)
> > {
> > const struct vpci_bar *bar = &header->bars[j];
> > + struct rangeset *mem = task->bars[j].mem;
> >
> > - if ( !rangeset_overlaps_range(bar->mem, start, end) ||
> > + if ( !rangeset_overlaps_range(mem, start, end) ||
> > /*
> > * If only the ROM enable bit is toggled check
> > against
> > * other BARs in the same device for overlaps,
> > but not
> > @@ -480,12 +546,13 @@ static int modify_bars(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> > bar->type == VPCI_BAR_ROM) )
> > continue;
> >
> > - rc = rangeset_remove_range(bar->mem, start, end);
> > + rc = rangeset_remove_range(mem, start, end);
> > if ( rc )
> > {
> > gprintk(XENLOG_WARNING,
> > "%pp: failed to remove [%lx, %lx]: %d\n",
> > &pdev->sbdf, start, end, rc);
> > + destroy_map_task(task);
> > return rc;
> > }
> > }
> > @@ -509,10 +576,12 @@ static int modify_bars(const struct pci_dev *pdev,
> > uint16_t cmd, bool rom_only)
> > * will always be to establish mappings and process all the BARs.
> > */
> > ASSERT((cmd & PCI_COMMAND_MEMORY) && !rom_only);
> > - return apply_map(pdev->domain, pdev, cmd);
> > + rc = apply_map(task);
> > + destroy_map_task(task);
> > + return rc;
> > }
> >
> > - defer_map(pdev, cmd, rom_only);
> > + defer_map(task);
> >
> > return 0;
> > }
> > @@ -731,18 +800,6 @@ static void cf_check rom_write(
> > }
> > }
> >
> > -static int bar_add_rangeset(const struct pci_dev *pdev, struct vpci_bar
> > *bar,
> > - unsigned int i)
> > -{
> > - char str[32];
> > -
> > - snprintf(str, sizeof(str), "%pp:BAR%u", &pdev->sbdf, i);
> > -
> > - bar->mem = rangeset_new(pdev->domain, str, RANGESETF_no_print);
> > -
> > - return !bar->mem ? -ENOMEM : 0;
> > -}
> > -
> > static int vpci_init_capability_list(struct pci_dev *pdev)
> > {
> > int rc;
> > @@ -947,10 +1004,6 @@ static int cf_check init_header(struct pci_dev *pdev)
> > else
> > bars[i].type = VPCI_BAR_MEM32;
> >
> > - rc = bar_add_rangeset(pdev, &bars[i], i);
> > - if ( rc )
> > - goto fail;
> > -
> > rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size,
> > (i == num_bars - 1) ? PCI_BAR_LAST : 0);
> > if ( rc < 0 )
> > @@ -1003,12 +1056,6 @@ static int cf_check init_header(struct pci_dev *pdev)
> > 4, rom);
> > if ( rc )
> > rom->type = VPCI_BAR_EMPTY;
> > - else
> > - {
> > - rc = bar_add_rangeset(pdev, rom, num_bars);
> > - if ( rc )
> > - goto fail;
> > - }
> > }
> > else if ( !is_hwdom )
> > {
> > diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> > index 09988f04c27c..7177cfce355d 100644
> > --- a/xen/drivers/vpci/vpci.c
> > +++ b/xen/drivers/vpci/vpci.c
> > @@ -117,9 +117,6 @@ void vpci_deassign_device(struct pci_dev *pdev)
> > iounmap(pdev->vpci->msix->table[i]);
> > }
> >
> > - for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
> > - rangeset_destroy(pdev->vpci->header.bars[i].mem);
> > -
> > xfree(pdev->vpci->msix);
> > xfree(pdev->vpci->msi);
> > xfree(pdev->vpci);
> > diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
> > index 6a481a4e89d3..c2e75076691f 100644
> > --- a/xen/include/xen/vpci.h
> > +++ b/xen/include/xen/vpci.h
> > @@ -103,7 +103,6 @@ struct vpci {
> > uint64_t guest_addr;
> > uint64_t size;
> > uint64_t resizable_sizes;
> > - struct rangeset *mem;
> > enum {
> > VPCI_BAR_EMPTY,
> > VPCI_BAR_IO,
> > @@ -194,14 +193,25 @@ struct vpci {
> > #endif
> > };
> >
> > -struct vpci_vcpu {
> > +#ifdef __XEN__
> > +struct vpci_map_task {
> > /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
> > + struct list_head next;
> > const struct pci_dev *pdev;
> > + struct domain *domain;
> > + struct vpci_bar_map {
> > + uint64_t addr;
> > + uint64_t guest_addr;
>
> Perhaps use hpa/gpa notation for naming address fields?
>
> > + struct rangeset *mem;
> > + } bars[PCI_HEADER_NORMAL_NR_BARS + 1];
>
> I would add a brief comment for `+ 1`
>
> > uint16_t cmd;
> > bool rom_only : 1;
> > };
> >
> > -#ifdef __XEN__
> > +struct vpci_vcpu {
> > + struct list_head task_queue;
> > +};
> > +
> > void vpci_dump_msi(void);
> >
> > /* Make sure there's a hole in the p2m for the MSIX mmio areas. */
> > --
> > 2.50.1
> >
> >
>
>
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |