# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1184295210 -32400 # Node ID a3f2f5e99df1a90fa21f24956dddcb8b6c341b05 # Parent 670f8d5305d25cfd79da42eb89d6f50ac745281c [Linux, blktap] fix page reference count/file rss count leak fix page reference count/file rss count leak when auto translated mode is enabled. Tapdisk process rss size becomes too large with auto translation enabled. The example is as follows where dom0 has only several hundred megabytes. This patch fixes it. > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND > 6758 root 15 0 39824 1.7t 1.7t S 0 188932.0 14:10.28 tapdisk This is because page reference count/file rss size are incremented when io request is accepted, but aren't decremented when the request is done. This can be fixed by using vm_insert_page() in blktap_mmap() instead of remap_pfn_range(). Details: The tapdisk daemon mmaps blktap device and the blktap driver maps page from the front end into the mmapped area and unmaps it when I/O request is done. When io request is accepted, dispatch_rw_block_io() is called. With auto translated mode disabled, it directly manipulates the page table without incrementing rss size. With auto translated mode enabled, it calls vm_insert_page() which increments page reference count/file rss. When io request is done, fast_flush_area() is called. With auto translated mode disabled, it directly manipulates the page table without decrementing rss size. With auto translated mode enabled, it calls zap_page_range() which should decrements page reference count/file rss. However (vma->vm_flags & VM_PFNMAP) is true, it doesn't decrement them so that page reference count and file rss are leaked. blktap driver allocates pages and never free them so that page reference count leak doesn't cause an issue (probably until overflow). Without auto translation, it makes sense for blktap_mmap() to set VM_PFNMAP with remap_pfn_range() because the blktap driver directly manipulates page tables. On the other hand with auto translation the VM_PFNMAP bit shouldn't set. This can be achieved by using vm_insert_page() in blktap_mmap() instead of remap_pfn_range() Signed-off-by: Isaku Yamahata diff -r 670f8d5305d2 -r a3f2f5e99df1 drivers/xen/blktap/blktap.c --- a/drivers/xen/blktap/blktap.c Thu Jul 12 16:05:09 2007 +0100 +++ b/drivers/xen/blktap/blktap.c Fri Jul 13 11:53:30 2007 +0900 @@ -653,6 +653,7 @@ static int blktap_mmap(struct file *filp struct page **map; int i; tap_blkif_t *info = filp->private_data; + int ret; if (info == NULL) { WPRINTK("blktap: mmap, retrieving idx failed\n"); @@ -674,9 +675,14 @@ static int blktap_mmap(struct file *filp info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); /* Map the ring pages to the start of the region and reserve it. */ - if (remap_pfn_range(vma, vma->vm_start, - __pa(info->ufe_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) { + if (xen_feature(XENFEAT_auto_translated_physmap)) + ret = vm_insert_page(vma, vma->vm_start, + virt_to_page(info->ufe_ring.sring)); + else + ret = remap_pfn_range(vma, vma->vm_start, + __pa(info->ufe_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot); + if (ret) { WPRINTK("Mapping user ring failed!\n"); goto fail; }