[XFS] Only use refcounted pages for I/O
authorChristoph Hellwig <hch@infradead.org>
Mon, 14 May 2007 08:23:50 +0000 (18:23 +1000)
committerTim Shimmin <tes@chook.melbourne.sgi.com>
Sat, 14 Jul 2007 05:21:14 +0000 (15:21 +1000)
Many block drivers (aoe, iscsi) really want refcountable pages in bios,
which is what almost everyone send down. XFS unfortunately has a few
places where it sends down buffers that may come from kmalloc, which
breaks them.

Fix the places that use kmalloc()d buffers.

SGI-PV: 964546
SGI-Modid: xfs-linux-melb:xfs-kern:28562a

Signed-Off-By: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/linux-2.6/xfs_buf.h
fs/xfs/xfs_log.c

index fe4f66a5af145a31a61afac29b440c4ce1f2d5d6..208daf58b826765ca3f097c06a4dfc208e65701a 100644 (file)
@@ -314,7 +314,7 @@ xfs_buf_free(
 
        ASSERT(list_empty(&bp->b_hash_list));
 
-       if (bp->b_flags & _XBF_PAGE_CACHE) {
+       if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
                uint            i;
 
                if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
@@ -323,18 +323,11 @@ xfs_buf_free(
                for (i = 0; i < bp->b_page_count; i++) {
                        struct page     *page = bp->b_pages[i];
 
-                       ASSERT(!PagePrivate(page));
+                       if (bp->b_flags & _XBF_PAGE_CACHE)
+                               ASSERT(!PagePrivate(page));
                        page_cache_release(page);
                }
                _xfs_buf_free_pages(bp);
-       } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
-                /*
-                 * XXX(hch): bp->b_count_desired might be incorrect (see
-                 * xfs_buf_associate_memory for details), but fortunately
-                 * the Linux version of kmem_free ignores the len argument..
-                 */
-               kmem_free(bp->b_addr, bp->b_count_desired);
-               _xfs_buf_free_pages(bp);
        }
 
        xfs_buf_deallocate(bp);
@@ -764,41 +757,41 @@ xfs_buf_get_noaddr(
        size_t                  len,
        xfs_buftarg_t           *target)
 {
-       size_t                  malloc_len = len;
+       unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
+       int                     error, i;
        xfs_buf_t               *bp;
-       void                    *data;
-       int                     error;
 
        bp = xfs_buf_allocate(0);
        if (unlikely(bp == NULL))
                goto fail;
        _xfs_buf_initialize(bp, target, 0, len, 0);
 
- try_again:
-       data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE);
-       if (unlikely(data == NULL))
+       error = _xfs_buf_get_pages(bp, page_count, 0);
+       if (error)
                goto fail_free_buf;
 
-       /* check whether alignment matches.. */
-       if ((__psunsigned_t)data !=
-           ((__psunsigned_t)data & ~target->bt_smask)) {
-               /* .. else double the size and try again */
-               kmem_free(data, malloc_len);
-               malloc_len <<= 1;
-               goto try_again;
+       for (i = 0; i < page_count; i++) {
+               bp->b_pages[i] = alloc_page(GFP_KERNEL);
+               if (!bp->b_pages[i])
+                       goto fail_free_mem;
        }
+       bp->b_flags |= _XBF_PAGES;
 
-       error = xfs_buf_associate_memory(bp, data, len);
-       if (error)
+       error = _xfs_buf_map_pages(bp, XBF_MAPPED);
+       if (unlikely(error)) {
+               printk(KERN_WARNING "%s: failed to map pages\n",
+                               __FUNCTION__);
                goto fail_free_mem;
-       bp->b_flags |= _XBF_KMEM_ALLOC;
+       }
 
        xfs_buf_unlock(bp);
 
-       XB_TRACE(bp, "no_daddr", data);
+       XB_TRACE(bp, "no_daddr", len);
        return bp;
+
  fail_free_mem:
-       kmem_free(data, malloc_len);
+       while (--i >= 0)
+               __free_page(bp->b_pages[i]);
  fail_free_buf:
        xfs_buf_free(bp);
  fail:
index b6241f6201a5e89803bc5e63dad2e336850765a2..b5908a34b15d80916ac7ce307a76f02e319bcf73 100644 (file)
@@ -63,7 +63,7 @@ typedef enum {
 
        /* flags used only internally */
        _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache                 */
-       _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()              */
+       _XBF_PAGES = (1 << 18),     /* backed by refcounted pages          */
        _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
        _XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue             */
 } xfs_buf_flags_t;
index c48bf61f17bd847cdb04a6ca8ad6b6f9f8c7e273..635f99e6302ff642f940bd0d448cf6a06760c6f3 100644 (file)
@@ -1199,11 +1199,18 @@ xlog_alloc_log(xfs_mount_t      *mp,
                *iclogp = (xlog_in_core_t *)
                          kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
                iclog = *iclogp;
-               iclog->hic_data = (xlog_in_core_2_t *)
-                         kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
-
                iclog->ic_prev = prev_iclog;
                prev_iclog = iclog;
+
+               bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+               if (!XFS_BUF_CPSEMA(bp))
+                       ASSERT(0);
+               XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
+               XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
+               XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+               iclog->ic_bp = bp;
+               iclog->hic_data = bp->b_addr;
+
                log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
 
                head = &iclog->ic_header;
@@ -1216,11 +1223,6 @@ xlog_alloc_log(xfs_mount_t       *mp,
                INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
                memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
 
-               bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
-               XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
-               XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
-               XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
-               iclog->ic_bp = bp;
 
                iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
                iclog->ic_state = XLOG_STATE_ACTIVE;
@@ -1528,7 +1530,6 @@ xlog_dealloc_log(xlog_t *log)
                }
 #endif
                next_iclog = iclog->ic_next;
-               kmem_free(iclog->hic_data, log->l_iclog_size);
                kmem_free(iclog, sizeof(xlog_in_core_t));
                iclog = next_iclog;
        }