On Thu, 10 Nov 2011, Konrad Rzeszutek Wilk wrote:
> On Fri, Oct 28, 2011 at 04:07:33PM +0100, Anthony PERARD wrote:
> > From: Allen Kay <allen.m.kay@xxxxxxxxx>
> >
>
> This is going to be a bit lame review..
>
[...]
> > + return;
> > + }
> > +
> > + /* find register group entry */
> > + reg_grp_entry = pt_find_reg_grp(s, address);
> > + if (reg_grp_entry) {
> > + /* check 0 Hardwired register group */
> > + if (reg_grp_entry->reg_grp->grp_type == GRP_TYPE_HARDWIRED) {
> > + /* ignore silently */
> > + PT_LOG("Warning: Access to 0 Hardwired register. "
> > + "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
> > + pci_bus_num(d->bus), PCI_SLOT(d->devfn),
> > PCI_FUNC(d->devfn),
> > + address, len);
> > + return;
> > + }
> > + }
> > +
> > + /* read I/O device register value */
> > + rc = host_pci_get_block(s->real_device, address,
> > + (uint8_t *)&read_val, len);
> > + if (!rc) {
> > + PT_LOG("Error: pci_read_block failed. return value[%d].\n", rc);
>
> There isn't a PT_ERR? Hm, looking at the code there is only PT_LOG. Perhaps
> declearing PT_ERR and PT_WARN might be a good idea? In case in the future
> one wants different levels of this? Or do we really not care much about that?
I will add this two macros.
> > + memset(&read_val, 0xff, len);
> > + }
> > +
> > + /* pass directly to libpci for passthrough type register group */
>
> Um, is the libpci requirement a certain thing?
:(, it's just an old comment. libpci is not used anymore and have been
replaced by host-pci-device. I will replace libpci in the comment by
"the real device".
[...]
> > + switch (reg->size) {
> > + case 1:
> > + if (reg->u.b.write) {
> > + rc = reg->u.b.write(s, reg_entry, ptr_val,
> > + read_val >> ((real_offset & 3) <<
> > 3),
> > + valid_mask);
> > + }
> > + break;
> > + case 2:
> > + if (reg->u.w.write) {
> > + rc = reg->u.w.write(s, reg_entry, (uint16_t *)ptr_val,
> > + (read_val >> ((real_offset & 3) <<
> > 3)),
> > + valid_mask);
> > + }
> > + break;
> > + case 4:
> > + if (reg->u.dw.write) {
> > + rc = reg->u.dw.write(s, reg_entry, (uint32_t *)ptr_val,
> > + (read_val >> ((real_offset & 3)
> > << 3)),
> > + valid_mask);
> > + }
> > + break;
> > + }
> > +
> > + if (rc < 0) {
> > + hw_error("Internal error: Invalid write emulation "
> > + "return value[%d]. I/O emulator exit.\n", rc);
>
> Oh. I hadn't realized this, but you are using hw_error. Which is
> calling 'abort'! Yikes. Is there no way to recover from this? Say return
> 0xfffff?
In qemu-xen-traditionnal, it was an exit(1). I do not know the
consequence of a bad write, and I can not return anythings. So I suppose
that the guest would know that somethings wrong only on the next read.
Instead of abort();, I can just do nothing and return. Or we could unplug
the device from QEMU.
Any preference?
> > + }
> > +
> > + /* calculate next address to find */
> > + emul_len -= reg->size;
> > + if (emul_len > 0) {
> > + find_addr = real_offset + reg->size;
> > + }
> > + } else {
> > + /* nothing to do with passthrough type register,
> > + * continue to find next byte */
> > + emul_len--;
> > + find_addr++;
> > + }
> > + }
> > +
> > + /* need to shift back before passing them to libpci */
> > + val >>= (address & 3) << 3;
> > +
> > +out:
> > + if (!(reg && reg->no_wb)) {
> > + /* unknown regs are passed through */
> > + rc = host_pci_set_block(s->real_device, address, (uint8_t *)&val,
> > len);
> > +
> > + if (!rc) {
> > + PT_LOG("Error: pci_write_block failed. return value[%d].\n",
> > rc);
> > + }
> > + }
> > +
> > + if (s->pm_state != NULL && s->pm_state->flags & PT_FLAG_TRANSITING) {
> > + qemu_mod_timer(s->pm_state->pm_timer,
> > + qemu_get_clock_ms(rt_clock) +
> > s->pm_state->pm_delay);
> > + }
> > +}
> > +
> > +/* ioport/iomem space*/
> > +static void pt_iomem_map(XenPCIPassthroughState *s, int i,
> > + pcibus_t e_phys, pcibus_t e_size, int type)
> > +{
> > + uint32_t old_ebase = s->bases[i].e_physbase;
> > + bool first_map = s->bases[i].e_size == 0;
> > + int ret = 0;
> > +
> > + s->bases[i].e_physbase = e_phys;
> > + s->bases[i].e_size = e_size;
> > +
> > + PT_LOG("e_phys=%#"PRIx64" maddr=%#"PRIx64" type=%%d"
> > + " len=%#"PRIx64" index=%d first_map=%d\n",
> > + e_phys, s->bases[i].access.maddr, /*type,*/
> > + e_size, i, first_map);
> > +
> > + if (e_size == 0) {
> > + return;
> > + }
> > +
> > + if (!first_map && old_ebase != -1) {
>
> old_ebase != PCI_BAR_UNMAPPED ?
:(, no. Because old_ebase is a uint32_t and PCI_BAR_UNMAPPED is
pcibus_t (uint64_t in Xen case).
I'm not sure that a good idee to change the type of old_ebase as
xc_domain_memory_mapping bellow takes only uint32_t.
But, if I can replace a -1 by PCI_BAR_UNMAPPED, I will.
> > + /* Remove old mapping */
> > + ret = xc_domain_memory_mapping(xen_xc, xen_domid,
> > + old_ebase >> XC_PAGE_SHIFT,
> > + s->bases[i].access.maddr >> XC_PAGE_SHIFT,
> > + (e_size + XC_PAGE_SIZE - 1) >>
> > XC_PAGE_SHIFT,
> > + DPCI_REMOVE_MAPPING);
> > + if (ret != 0) {
> > + PT_LOG("Error: remove old mapping failed!\n");
> > + return;
> > + }
> > + }
> > +
> > + /* map only valid guest address */
> > + if (e_phys != -1) {
>
> PCI_BAR_UNMAPPED
>
> > + /* Create new mapping */
> > + ret = xc_domain_memory_mapping(xen_xc, xen_domid,
> > + s->bases[i].e_physbase >> XC_PAGE_SHIFT,
> > + s->bases[i].access.maddr >>
> > XC_PAGE_SHIFT,
> > + (e_size+XC_PAGE_SIZE-1) >>
> > XC_PAGE_SHIFT,
> > + DPCI_ADD_MAPPING);
> > +
> > + if (ret != 0) {
> > + PT_LOG("Error: create new mapping failed!\n");
> > + }
> > + }
> > +}
[...]
> > +void pt_bar_mapping(XenPCIPassthroughState *s, int io_enable, int
> > mem_enable)
> > +{
> > + int i;
> > +
> > + for (i = 0; i < PCI_NUM_REGIONS; i++) {
> > + pt_bar_mapping_one(s, i, io_enable, mem_enable);
> > + }
> > +}
> > +
> > +/* register regions */
> > +static int pt_register_regions(XenPCIPassthroughState *s)
> > +{
> > + int i = 0;
> > + uint32_t bar_data = 0;
> > + HostPCIDevice *d = s->real_device;
> > +
> > + /* Register PIO/MMIO BARs */
> > + for (i = 0; i < PCI_BAR_ENTRIES; i++) {
> > + HostPCIIORegion *r = &d->io_regions[i];
> > +
> > + if (r->base_addr) {
>
> So should you check for PCI_BAR_UNMAPPED or is that not really
> required here as the pci_register_bar would do it?
Actually, this value come from the real device (the value in
sysfs/resource). So, I think it's just 0 if it's not mapped.
Here, it's probably better to check for the size instead, to know if
there is actually a BAR.
> > + s->bases[i].e_physbase = r->base_addr;
> > + s->bases[i].access.u = r->base_addr;
> > +
> > + /* Register current region */
> > + if (r->flags & IORESOURCE_IO) {
> > + memory_region_init_io(&s->bar[i], NULL, NULL,
> > + "xen-pci-pt-bar", r->size);
>
> You can make the "xen_pci-pt-bar" be a #define somewhere and reuse that.
>
> > + pci_register_bar(&s->dev, i, PCI_BASE_ADDRESS_SPACE_IO,
> > + &s->bar[i]);
> > + } else if (r->flags & IORESOURCE_PREFETCH) {
> > + memory_region_init_io(&s->bar[i], NULL, NULL,
> > + "xen-pci-pt-bar", r->size);
> > + pci_register_bar(&s->dev, i, PCI_BASE_ADDRESS_MEM_PREFETCH,
> > + &s->bar[i]);
> > + } else {
> > + memory_region_init_io(&s->bar[i], NULL, NULL,
> > + "xen-pci-pt-bar", r->size);
> > + pci_register_bar(&s->dev, i, PCI_BASE_ADDRESS_SPACE_MEMORY,
> > + &s->bar[i]);
> > + }
> > +
> > + PT_LOG("IO region registered (size=0x%08"PRIx64
> > + " base_addr=0x%08"PRIx64")\n",
> > + r->size, r->base_addr);
> > + }
> > + }
> > +
> > + /* Register expansion ROM address */
> > + if (d->rom.base_addr && d->rom.size) {
> > + /* Re-set BAR reported by OS, otherwise ROM can't be read. */
> > + bar_data = host_pci_get_long(d, PCI_ROM_ADDRESS);
> > + if ((bar_data & PCI_ROM_ADDRESS_MASK) == 0) {
> > + bar_data |= d->rom.base_addr & PCI_ROM_ADDRESS_MASK;
> > + host_pci_set_long(d, PCI_ROM_ADDRESS, bar_data);
> > + }
> > +
> > + s->bases[PCI_ROM_SLOT].e_physbase = d->rom.base_addr;
> > + s->bases[PCI_ROM_SLOT].access.maddr = d->rom.base_addr;
> > +
> > + memory_region_init_rom_device(&s->rom, NULL, NULL, &s->dev.qdev,
> > + "xen-pci-pt-rom", d->rom.size);
> > + pci_register_bar(&s->dev, PCI_ROM_SLOT,
> > PCI_BASE_ADDRESS_MEM_PREFETCH,
> > + &s->rom);
> > +
> > + PT_LOG("Expansion ROM registered (size=0x%08"PRIx64
> > + " base_addr=0x%08"PRIx64")\n",
> > + d->rom.size, d->rom.base_addr);
> > + }
> > +
> > + return 0;
> > +}
> > +
[...]
> > +static int pt_initfn(PCIDevice *pcidev)
> > +{
> > + XenPCIPassthroughState *s = DO_UPCAST(XenPCIPassthroughState, dev,
> > pcidev);
> > + int dom, bus;
> > + unsigned slot, func;
> > + int rc = 0;
> > + uint32_t machine_irq;
> > + int pirq = -1;
> > +
> > + if (pci_parse_devaddr(s->hostaddr, &dom, &bus, &slot, &func) < 0) {
> > + fprintf(stderr, "error parse bdf: %s\n", s->hostaddr);
> > + return -1;
> > + }
> > +
> > + /* register real device */
> > + PT_LOG("Assigning real physical device %02x:%02x.%x to devfn %i ...\n",
> > + bus, slot, func, s->dev.devfn);
> > +
> > + s->real_device = host_pci_device_get(bus, slot, func);
> > + if (!s->real_device) {
> > + return -1;
> > + }
> > +
> > + s->is_virtfn = s->real_device->is_virtfn;
> > + if (s->is_virtfn) {
> > + PT_LOG("%04x:%02x:%02x.%x is a SR-IOV Virtual Function\n",
> > + s->real_device->domain, bus, slot, func);
> > + }
> > +
> > + /* Initialize virtualized PCI configuration (Extended 256 Bytes) */
> > + if (host_pci_get_block(s->real_device, 0, pcidev->config,
> > + PCI_CONFIG_SPACE_SIZE) == -1) {
> > + return -1;
> > + }
> > +
> > + /* Handle real device's MMIO/PIO BARs */
> > + pt_register_regions(s);
> > +
> > + /* reinitialize each config register to be emulated */
> > + pt_config_init(s);
> > +
> > + /* Bind interrupt */
> > + if (!s->dev.config[PCI_INTERRUPT_PIN]) {
> > + PT_LOG("no pin interrupt\n");
>
> Perhaps include some details of which device failed?
There is already detailed about the device at the beginning of the
function. Is it not enough?
> > + goto out;
> > + }
> > +
> > + machine_irq = host_pci_get_byte(s->real_device, PCI_INTERRUPT_LINE);
> > + rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
> > +
> > + if (rc) {
> > + PT_LOG("Error: Mapping irq failed, rc = %d\n", rc);
>
> Can you also include the IRQ it tried to map (both machine and pirq).
Yep.
> > +
> > + /* Disable PCI intx assertion (turn on bit10 of devctl) */
> > + host_pci_set_word(s->real_device,
> > + PCI_COMMAND,
> > + pci_get_word(s->dev.config + PCI_COMMAND)
> > + | PCI_COMMAND_INTX_DISABLE);
> > + machine_irq = 0;
> > + s->machine_irq = 0;
> > + } else {
> > + machine_irq = pirq;
> > + s->machine_irq = pirq;
> > + mapped_machine_irq[machine_irq]++;
> > + }
> > +
> > + /* bind machine_irq to device */
> > + if (rc < 0 && machine_irq != 0) {
> > + uint8_t e_device = PCI_SLOT(s->dev.devfn);
> > + uint8_t e_intx = pci_intx(s);
> > +
> > + rc = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, machine_irq, 0,
> > + e_device, e_intx);
> > + if (rc < 0) {
> > + PT_LOG("Error: Binding of interrupt failed! rc=%d\n", rc);
>
> A bit details - name of the device, the IRQ,..
>
> > +
> > + /* Disable PCI intx assertion (turn on bit10 of devctl) */
> > + host_pci_set_word(s->real_device, PCI_COMMAND,
> > + *(uint16_t *)(&s->dev.config[PCI_COMMAND])
> > + | PCI_COMMAND_INTX_DISABLE);
> > + mapped_machine_irq[machine_irq]--;
> > +
> > + if (mapped_machine_irq[machine_irq] == 0) {
> > + if (xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq))
> > {
> > + PT_LOG("Error: Unmapping of interrupt failed! rc=%d\n",
> > + rc);
>
> And here too. It would be beneficial to have on the error paths lots of
> nice details so that in the field it will be easier to find out what
> went wrong (and match up PIRQ with the GSI).
Yes, I will try to improve the messages.
It's also probably good to always print the errors.
Thanks,
--
Anthony PERARD
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|