[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2 1/3] vpci: allow queueing of mapping operations
On Thu, Jul 24, 2025 at 03:00:46AM +0000, dmkhn@xxxxxxxxx wrote: > On Wed, Jul 23, 2025 at 12:37:41PM -0400, Stewart Hildebrand wrote: > > Introduce vPCI BAR mapping task queue. Decouple map operation state from > > general vPCI state: in particular, move the per-BAR rangeset out of > > struct vpci and into the map task struct. > > > > This is preparatory work for further changes that need to perform > > multiple unmap/map operations before returning to guest. > > > > Signed-off-by: Stewart Hildebrand <stewart.hildebrand@xxxxxxx> > > --- > > v1->v2: > > * new patch > > > > Related: 622bdd962822 ("vpci/header: handle p2m range sets per BAR") > > --- > > xen/common/domain.c | 4 + > > xen/drivers/vpci/header.c | 197 +++++++++++++++++++++++--------------- > > xen/drivers/vpci/vpci.c | 3 - > > xen/include/xen/vpci.h | 16 +++- > > 4 files changed, 139 insertions(+), 81 deletions(-) > > > > diff --git a/xen/common/domain.c b/xen/common/domain.c > > index 303c338ef293..214795e2d2fe 100644 > > --- a/xen/common/domain.c > > +++ b/xen/common/domain.c > > @@ -459,6 +459,10 @@ struct vcpu *vcpu_create(struct domain *d, unsigned > > int vcpu_id) > > d->vcpu[prev_id]->next_in_list = v; > > } > > > > +#ifdef CONFIG_HAS_VPCI > > + INIT_LIST_HEAD(&v->vpci.task_queue); > > +#endif > > + > > /* Must be called after making new vcpu visible to for_each_vcpu(). */ > > vcpu_check_shutdown(v); > > > > diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c > > index bb76e707992c..df065a5f5faf 100644 > > --- a/xen/drivers/vpci/header.c > > +++ b/xen/drivers/vpci/header.c > > @@ -34,7 +34,7 @@ > > > > struct map_data { > > struct domain *d; > > - const struct vpci_bar *bar; > > + const struct vpci_bar_map *bar; > > Perhaps s/vpci_bar_map/vpci_bar_mmio/g to highlight the BAR type? > > > bool map; > > }; > > > > @@ -173,31 +173,23 @@ static void modify_decoding(const struct pci_dev > > *pdev, uint16_t cmd, > > ASSERT_UNREACHABLE(); > > } > > > > -bool vpci_process_pending(struct vcpu *v) > > +static bool vpci_process_map_task(struct vpci_map_task *task) > > { > > - const struct pci_dev *pdev = v->vpci.pdev; > > - struct vpci_header *header = NULL; > > + const struct pci_dev *pdev = task->pdev; > > unsigned int i; > > > > if ( !pdev ) > > return false; > > > > - read_lock(&v->domain->pci_lock); > > - > > - if ( !pdev->vpci || (v->domain != pdev->domain) ) > > - { > > - v->vpci.pdev = NULL; > > - read_unlock(&v->domain->pci_lock); > > + if ( !pdev->vpci || (task->domain != pdev->domain) ) > > return false; > > - } > > > > - header = &pdev->vpci->header; > > - for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > > + for ( i = 0; i < ARRAY_SIZE(task->bars); i++ ) > > { > > - struct vpci_bar *bar = &header->bars[i]; > > + struct vpci_bar_map *bar = &task->bars[i]; > > struct map_data data = { > > - .d = v->domain, > > - .map = v->vpci.cmd & PCI_COMMAND_MEMORY, > > + .d = task->domain, > > + .map = task->cmd & PCI_COMMAND_MEMORY, > > .bar = bar, > > }; > > int rc; > > @@ -208,57 +200,79 @@ bool vpci_process_pending(struct vcpu *v) > > rc = rangeset_consume_ranges(bar->mem, map_range, &data); > > > > if ( rc == -ERESTART ) > > - { > > - read_unlock(&v->domain->pci_lock); > > return true; > > - } > > > > if ( rc ) > > { > > spin_lock(&pdev->vpci->lock); > > /* Disable memory decoding unconditionally on failure. */ > > - modify_decoding(pdev, v->vpci.cmd & ~PCI_COMMAND_MEMORY, > > + modify_decoding(pdev, task->cmd & ~PCI_COMMAND_MEMORY, > > false); > > spin_unlock(&pdev->vpci->lock); > > > > - /* Clean all the rangesets */ > > - for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > > - if ( !rangeset_is_empty(header->bars[i].mem) ) > > - rangeset_purge(header->bars[i].mem); > > - > > - v->vpci.pdev = NULL; > > - > > - read_unlock(&v->domain->pci_lock); > > - > > - if ( !is_hardware_domain(v->domain) ) > > - domain_crash(v->domain); > > + if ( !is_hardware_domain(task->domain) ) > > + domain_crash(task->domain); > > > > return false; > > } > > } > > - v->vpci.pdev = NULL; > > > > spin_lock(&pdev->vpci->lock); > > - modify_decoding(pdev, v->vpci.cmd, v->vpci.rom_only); > > + modify_decoding(pdev, task->cmd, task->rom_only); > > spin_unlock(&pdev->vpci->lock); > > > > - read_unlock(&v->domain->pci_lock); > > + return false; > > +} > > + > > +static void destroy_map_task(struct vpci_map_task *task) > > +{ > > + unsigned int i; > > > > + if ( !task ) > > Looks like task is never NULL in the code, suggest ASSERT(). > > > + return; > > + > > + for ( i = 0; i < ARRAY_SIZE(task->bars); i++ ) > > + rangeset_destroy(task->bars[i].mem); > > + > > + xfree(task); > > +} > > + > > +bool vpci_process_pending(struct vcpu *v) > > +{ > > + struct vpci_map_task *task; > > + read_lock(&v->domain->pci_lock); > > + > > + while ( (task = list_first_entry_or_null(&v->vpci.task_queue, > > + struct vpci_map_task, > > + next)) != NULL ) > > + { > > + if ( vpci_process_map_task(task) ) > > + { > > + read_unlock(&v->domain->pci_lock); > > I would add local variable and use it here for shorter code: > > int rc; > > read_lock(&v->domain->pci_lock); > > while (...) > { > rc = vpci_process_map_task(task); > if ( rc ) > break; > > ... > } > > read_unlock(&v->domain->pci_lock); > > return rc; > > > > > + return true; > > + } > > + > > + list_del(&task->next); > > + destroy_map_task(task); > > + } > > + > > + read_unlock(&v->domain->pci_lock); > > return false; > > } > > > > -static int __init apply_map(struct domain *d, const struct pci_dev *pdev, > > - uint16_t cmd) > > +static int __init apply_map(struct vpci_map_task *task) > > { > > - struct vpci_header *header = &pdev->vpci->header; > > + struct domain *d = task->domain; > > + const struct pci_dev *pdev = task->pdev; > > + uint16_t cmd = task->cmd; > > int rc = 0; > > unsigned int i; > > > > ASSERT(rw_is_write_locked(&d->pci_lock)); > > > > - for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > > + for ( i = 0; i < ARRAY_SIZE(task->bars); i++ ) > > { > > - struct vpci_bar *bar = &header->bars[i]; > > + struct vpci_bar_map *bar = &task->bars[i]; > > struct map_data data = { .d = d, .map = true, .bar = bar }; > > > > if ( rangeset_is_empty(bar->mem) ) > > @@ -283,7 +297,48 @@ static int __init apply_map(struct domain *d, const > > struct pci_dev *pdev, > > return rc; > > } > > > > -static void defer_map(const struct pci_dev *pdev, uint16_t cmd, bool > > rom_only) > > +static struct vpci_map_task *alloc_map_task(const struct pci_dev *pdev, > > + uint16_t cmd, bool rom_only) > > +{ > > + struct vpci_map_task *task = xzalloc(struct vpci_map_task); > > + unsigned int i; > > + > > I would move allocation outside of initializer and use preferred xvzalloc() > variant: > > task = xvzalloc(struct vpci_map_task); > > + if ( !task ) > > + return NULL; > > + > > + BUILD_BUG_ON(ARRAY_SIZE(task->bars) != > > ARRAY_SIZE(pdev->vpci->header.bars)); > > + > > + for ( i = 0; i < ARRAY_SIZE(task->bars); i++ ) > > + { > > + if ( pdev->vpci->header.bars[i].type >= VPCI_BAR_MEM32 ) > > I would reduce one level of indentation by > > if ( pdev->vpci->header.bars[i].type < VPCI_BAR_MEM32 ) > continue; > > > > + { > > + char str[32]; > > + > > + snprintf(str, sizeof(str), "%pp:BAR%u", &pdev->sbdf, i); > > + > > + task->bars[i].mem = rangeset_new(pdev->domain, str, > > + RANGESETF_no_print); > > + > > + if ( !task->bars[i].mem ) > > + { > > + destroy_map_task(task); > > What if allocation fails in the middle of the loop, e.g. i == 5? > I think previously allocated rangesets in this loop should be freed. Oh, I see that is done in destroy_map_task(); please disregard. > > > + return NULL; > > + } > > + > > + task->bars[i].addr = pdev->vpci->header.bars[i].addr; > > + task->bars[i].guest_addr = > > pdev->vpci->header.bars[i].guest_addr; > > + } > > + } > > + > > + task->pdev = pdev; > > + task->domain = pdev->domain; > > + task->cmd = cmd; > > + task->rom_only = rom_only; > > + > > + return task; > > +} > > + > > +static void defer_map(struct vpci_map_task *task) > > { > > struct vcpu *curr = current; > > > > @@ -293,9 +348,9 @@ static void defer_map(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > * is mapped. This can lead to parallel mapping operations being > > * started for the same device if the domain is not well-behaved. > > */ > > - curr->vpci.pdev = pdev; > > - curr->vpci.cmd = cmd; > > - curr->vpci.rom_only = rom_only; > > + > > + list_add_tail(&task->next, &curr->vpci.task_queue); > > + > > /* > > * Raise a scheduler softirq in order to prevent the guest from > > resuming > > * execution with pending mapping operations, to trigger the invocation > > @@ -310,11 +365,15 @@ static int modify_bars(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > struct pci_dev *tmp; > > const struct domain *d; > > const struct vpci_msix *msix = pdev->vpci->msix; > > + struct vpci_map_task *task = alloc_map_task(pdev, cmd, rom_only); > > Same comment re: having allocation on a separate line. > > > unsigned int i, j; > > int rc; > > > > ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); > > > > + if ( !task ) > > + return -ENOMEM; > > + > > /* > > * Create a rangeset per BAR that represents the current device memory > > * region and compare it against all the currently active BAR memory > > @@ -330,12 +389,13 @@ static int modify_bars(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > > { > > struct vpci_bar *bar = &header->bars[i]; > > + struct rangeset *mem = task->bars[i].mem; > > unsigned long start = PFN_DOWN(bar->addr); > > unsigned long end = PFN_DOWN(bar->addr + bar->size - 1); > > unsigned long start_guest = PFN_DOWN(bar->guest_addr); > > unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - > > 1); > > > > - if ( !bar->mem ) > > + if ( !mem ) > > continue; > > > > if ( !MAPPABLE_BAR(bar) || > > @@ -353,7 +413,7 @@ static int modify_bars(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > continue; > > } > > > > - ASSERT(rangeset_is_empty(bar->mem)); > > + ASSERT(rangeset_is_empty(mem)); > > > > /* > > * Make sure that the guest set address has the same page offset > > @@ -365,21 +425,23 @@ static int modify_bars(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > gprintk(XENLOG_G_WARNING, > > "%pp: can't map BAR%u - offset mismatch: %#lx vs > > %#lx\n", > > &pdev->sbdf, i, bar->guest_addr, bar->addr); > > + destroy_map_task(task); > > I think using goto would be justified for doing cleanup in one place. > > > return -EINVAL; > > } > > > > - rc = rangeset_add_range(bar->mem, start_guest, end_guest); > > + rc = rangeset_add_range(mem, start_guest, end_guest); > > if ( rc ) > > { > > printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n", > > start_guest, end_guest, rc); > > + destroy_map_task(task); > > return rc; > > } > > > > /* Check for overlap with the already setup BAR ranges. */ > > for ( j = 0; j < i; j++ ) > > { > > - struct vpci_bar *prev_bar = &header->bars[j]; > > + struct vpci_bar_map *prev_bar = &task->bars[j]; > > > > if ( rangeset_is_empty(prev_bar->mem) ) > > continue; > > @@ -390,16 +452,18 @@ static int modify_bars(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > gprintk(XENLOG_WARNING, > > "%pp: failed to remove overlapping range [%lx, > > %lx]: %d\n", > > &pdev->sbdf, start_guest, end_guest, rc); > > + destroy_map_task(task); > > return rc; > > } > > } > > > > - rc = pci_sanitize_bar_memory(bar->mem); > > + rc = pci_sanitize_bar_memory(mem); > > if ( rc ) > > { > > gprintk(XENLOG_WARNING, > > "%pp: failed to sanitize BAR#%u memory: %d\n", > > &pdev->sbdf, i, rc); > > + destroy_map_task(task); > > return rc; > > } > > } > > @@ -413,7 +477,7 @@ static int modify_bars(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > > > for ( j = 0; j < ARRAY_SIZE(header->bars); j++ ) > > { > > - const struct vpci_bar *bar = &header->bars[j]; > > + const struct vpci_bar_map *bar = &task->bars[j]; > > > > if ( rangeset_is_empty(bar->mem) ) > > continue; > > @@ -424,6 +488,7 @@ static int modify_bars(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > gprintk(XENLOG_WARNING, > > "%pp: failed to remove MSIX table [%lx, %lx]: %d\n", > > &pdev->sbdf, start, end, rc); > > + destroy_map_task(task); > > return rc; > > } > > } > > @@ -468,8 +533,9 @@ static int modify_bars(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > for ( j = 0; j < ARRAY_SIZE(header->bars); j++) > > { > > const struct vpci_bar *bar = &header->bars[j]; > > + struct rangeset *mem = task->bars[j].mem; > > > > - if ( !rangeset_overlaps_range(bar->mem, start, end) || > > + if ( !rangeset_overlaps_range(mem, start, end) || > > /* > > * If only the ROM enable bit is toggled check > > against > > * other BARs in the same device for overlaps, > > but not > > @@ -480,12 +546,13 @@ static int modify_bars(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > bar->type == VPCI_BAR_ROM) ) > > continue; > > > > - rc = rangeset_remove_range(bar->mem, start, end); > > + rc = rangeset_remove_range(mem, start, end); > > if ( rc ) > > { > > gprintk(XENLOG_WARNING, > > "%pp: failed to remove [%lx, %lx]: %d\n", > > &pdev->sbdf, start, end, rc); > > + destroy_map_task(task); > > return rc; > > } > > } > > @@ -509,10 +576,12 @@ static int modify_bars(const struct pci_dev *pdev, > > uint16_t cmd, bool rom_only) > > * will always be to establish mappings and process all the BARs. > > */ > > ASSERT((cmd & PCI_COMMAND_MEMORY) && !rom_only); > > - return apply_map(pdev->domain, pdev, cmd); > > + rc = apply_map(task); > > + destroy_map_task(task); > > + return rc; > > } > > > > - defer_map(pdev, cmd, rom_only); > > + defer_map(task); > > > > return 0; > > } > > @@ -731,18 +800,6 @@ static void cf_check rom_write( > > } > > } > > > > -static int bar_add_rangeset(const struct pci_dev *pdev, struct vpci_bar > > *bar, > > - unsigned int i) > > -{ > > - char str[32]; > > - > > - snprintf(str, sizeof(str), "%pp:BAR%u", &pdev->sbdf, i); > > - > > - bar->mem = rangeset_new(pdev->domain, str, RANGESETF_no_print); > > - > > - return !bar->mem ? -ENOMEM : 0; > > -} > > - > > static int vpci_init_capability_list(struct pci_dev *pdev) > > { > > int rc; > > @@ -947,10 +1004,6 @@ static int cf_check init_header(struct pci_dev *pdev) > > else > > bars[i].type = VPCI_BAR_MEM32; > > > > - rc = bar_add_rangeset(pdev, &bars[i], i); > > - if ( rc ) > > - goto fail; > > - > > rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size, > > (i == num_bars - 1) ? PCI_BAR_LAST : 0); > > if ( rc < 0 ) > > @@ -1003,12 +1056,6 @@ static int cf_check init_header(struct pci_dev *pdev) > > 4, rom); > > if ( rc ) > > rom->type = VPCI_BAR_EMPTY; > > - else > > - { > > - rc = bar_add_rangeset(pdev, rom, num_bars); > > - if ( rc ) > > - goto fail; > > - } > > } > > else if ( !is_hwdom ) > > { > > diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c > > index 09988f04c27c..7177cfce355d 100644 > > --- a/xen/drivers/vpci/vpci.c > > +++ b/xen/drivers/vpci/vpci.c > > @@ -117,9 +117,6 @@ void vpci_deassign_device(struct pci_dev *pdev) > > iounmap(pdev->vpci->msix->table[i]); > > } > > > > - for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ ) > > - rangeset_destroy(pdev->vpci->header.bars[i].mem); > > - > > xfree(pdev->vpci->msix); > > xfree(pdev->vpci->msi); > > xfree(pdev->vpci); > > diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h > > index 6a481a4e89d3..c2e75076691f 100644 > > --- a/xen/include/xen/vpci.h > > +++ b/xen/include/xen/vpci.h > > @@ -103,7 +103,6 @@ struct vpci { > > uint64_t guest_addr; > > uint64_t size; > > uint64_t resizable_sizes; > > - struct rangeset *mem; > > enum { > > VPCI_BAR_EMPTY, > > VPCI_BAR_IO, > > @@ -194,14 +193,25 @@ struct vpci { > > #endif > > }; > > > > -struct vpci_vcpu { > > +#ifdef __XEN__ > > +struct vpci_map_task { > > /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */ > > + struct list_head next; > > const struct pci_dev *pdev; > > + struct domain *domain; > > + struct vpci_bar_map { > > + uint64_t addr; > > + uint64_t guest_addr; > > Perhaps use hpa/gpa notation for naming address fields? > > > + struct rangeset *mem; > > + } bars[PCI_HEADER_NORMAL_NR_BARS + 1]; > > I would add a brief comment for `+ 1` > > > uint16_t cmd; > > bool rom_only : 1; > > }; > > > > -#ifdef __XEN__ > > +struct vpci_vcpu { > > + struct list_head task_queue; > > +}; > > + > > void vpci_dump_msi(void); > > > > /* Make sure there's a hole in the p2m for the MSIX mmio areas. */ > > -- > > 2.50.1 > > > > > >
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |