[XFS] simplify xfs_create/mknod/symlink prototype
[linux-2.6-block.git] / fs / xfs / xfs_itable.c
index 46249e4d1feaba62eec485e23376c2d3508f7440..1edd9afb664b062ab2346a490e4e5f21c96454d4 100644 (file)
 #include "xfs_error.h"
 #include "xfs_btree.h"
 
+int
+xfs_internal_inum(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino)
+{
+       return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
+               (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
+                (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
+}
+
 STATIC int
 xfs_bulkstat_one_iget(
        xfs_mount_t     *mp,            /* mount point for filesystem */
@@ -47,12 +57,13 @@ xfs_bulkstat_one_iget(
        xfs_bstat_t     *buf,           /* return buffer */
        int             *stat)          /* BULKSTAT_RV_... */
 {
-       xfs_dinode_core_t *dic;         /* dinode core info pointer */
+       xfs_icdinode_t  *dic;   /* dinode core info pointer */
        xfs_inode_t     *ip;            /* incore inode pointer */
        bhv_vnode_t     *vp;
        int             error;
 
-       error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
+       error = xfs_iget(mp, NULL, ino,
+                        XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
        if (error) {
                *stat = BULKSTAT_RV_NOTHING;
                return error;
@@ -140,37 +151,37 @@ xfs_bulkstat_one_dinode(
         * the new format. We don't change the version number so that we
         * can distinguish this from a real new format inode.
         */
-       if (INT_GET(dic->di_version, ARCH_CONVERT) == XFS_DINODE_VERSION_1) {
-               buf->bs_nlink = INT_GET(dic->di_onlink, ARCH_CONVERT);
+       if (dic->di_version == XFS_DINODE_VERSION_1) {
+               buf->bs_nlink = be16_to_cpu(dic->di_onlink);
                buf->bs_projid = 0;
        } else {
-               buf->bs_nlink = INT_GET(dic->di_nlink, ARCH_CONVERT);
-               buf->bs_projid = INT_GET(dic->di_projid, ARCH_CONVERT);
+               buf->bs_nlink = be32_to_cpu(dic->di_nlink);
+               buf->bs_projid = be16_to_cpu(dic->di_projid);
        }
 
        buf->bs_ino = ino;
-       buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT);
-       buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT);
-       buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT);
-       buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT);
-       buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT);
-       buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT);
-       buf->bs_mtime.tv_sec = INT_GET(dic->di_mtime.t_sec, ARCH_CONVERT);
-       buf->bs_mtime.tv_nsec = INT_GET(dic->di_mtime.t_nsec, ARCH_CONVERT);
-       buf->bs_ctime.tv_sec = INT_GET(dic->di_ctime.t_sec, ARCH_CONVERT);
-       buf->bs_ctime.tv_nsec = INT_GET(dic->di_ctime.t_nsec, ARCH_CONVERT);
+       buf->bs_mode = be16_to_cpu(dic->di_mode);
+       buf->bs_uid = be32_to_cpu(dic->di_uid);
+       buf->bs_gid = be32_to_cpu(dic->di_gid);
+       buf->bs_size = be64_to_cpu(dic->di_size);
+       buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec);
+       buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec);
+       buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec);
+       buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
+       buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
+       buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
        buf->bs_xflags = xfs_dic2xflags(dic);
-       buf->bs_extsize = INT_GET(dic->di_extsize, ARCH_CONVERT) << mp->m_sb.sb_blocklog;
-       buf->bs_extents = INT_GET(dic->di_nextents, ARCH_CONVERT);
-       buf->bs_gen = INT_GET(dic->di_gen, ARCH_CONVERT);
+       buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
+       buf->bs_extents = be32_to_cpu(dic->di_nextents);
+       buf->bs_gen = be32_to_cpu(dic->di_gen);
        memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
-       buf->bs_dmevmask = INT_GET(dic->di_dmevmask, ARCH_CONVERT);
-       buf->bs_dmstate = INT_GET(dic->di_dmstate, ARCH_CONVERT);
-       buf->bs_aextents = INT_GET(dic->di_anextents, ARCH_CONVERT);
+       buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
+       buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
+       buf->bs_aextents = be16_to_cpu(dic->di_anextents);
 
-       switch (INT_GET(dic->di_format, ARCH_CONVERT)) {
+       switch (dic->di_format) {
        case XFS_DINODE_FMT_DEV:
-               buf->bs_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT);
+               buf->bs_rdev = be32_to_cpu(dip->di_u.di_dev);
                buf->bs_blksize = BLKDEV_IOSIZE;
                buf->bs_blocks = 0;
                break;
@@ -184,13 +195,23 @@ xfs_bulkstat_one_dinode(
        case XFS_DINODE_FMT_BTREE:
                buf->bs_rdev = 0;
                buf->bs_blksize = mp->m_sb.sb_blocksize;
-               buf->bs_blocks = INT_GET(dic->di_nblocks, ARCH_CONVERT);
+               buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
                break;
        }
 
        return 0;
 }
 
+STATIC int
+xfs_bulkstat_one_fmt(
+       void                    __user *ubuffer,
+       const xfs_bstat_t       *buffer)
+{
+       if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
+               return -EFAULT;
+       return sizeof(*buffer);
+}
+
 /*
  * Return stat information for one inode.
  * Return 0 if ok, else errno.
@@ -210,19 +231,15 @@ xfs_bulkstat_one(
        xfs_bstat_t     *buf;           /* return buffer */
        int             error = 0;      /* error value */
        xfs_dinode_t    *dip;           /* dinode inode pointer */
+       bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt;
 
        dip = (xfs_dinode_t *)dibuff;
+       *stat = BULKSTAT_RV_NOTHING;
 
-       if (!buffer || ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
-           (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
-            (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))) {
-               *stat = BULKSTAT_RV_NOTHING;
+       if (!buffer || xfs_internal_inum(mp, ino))
                return XFS_ERROR(EINVAL);
-       }
-       if (ubsize < sizeof(*buf)) {
-               *stat = BULKSTAT_RV_NOTHING;
+       if (ubsize < sizeof(*buf))
                return XFS_ERROR(ENOMEM);
-       }
 
        buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
 
@@ -237,21 +254,68 @@ xfs_bulkstat_one(
                xfs_bulkstat_one_dinode(mp, ino, dip, buf);
        }
 
-       if (copy_to_user(buffer, buf, sizeof(*buf)))  {
-               *stat = BULKSTAT_RV_NOTHING;
-               error =  EFAULT;
+       error = formatter(buffer, buf);
+       if (error < 0)  {
+               error = EFAULT;
                goto out_free;
        }
 
        *stat = BULKSTAT_RV_DIDONE;
        if (ubused)
-               *ubused = sizeof(*buf);
+               *ubused = error;
 
  out_free:
        kmem_free(buf, sizeof(*buf));
        return error;
 }
 
+/*
+ * Test to see whether we can use the ondisk inode directly, based
+ * on the given bulkstat flags, filling in dipp accordingly.
+ * Returns zero if the inode is dodgey.
+ */
+STATIC int
+xfs_bulkstat_use_dinode(
+       xfs_mount_t     *mp,
+       int             flags,
+       xfs_buf_t       *bp,
+       int             clustidx,
+       xfs_dinode_t    **dipp)
+{
+       xfs_dinode_t    *dip;
+       unsigned int    aformat;
+
+       *dipp = NULL;
+       if (!bp || (flags & BULKSTAT_FG_IGET))
+               return 1;
+       dip = (xfs_dinode_t *)
+                       xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
+       /*
+        * Check the buffer containing the on-disk inode for di_nlink == 0.
+        * This is to prevent xfs_bulkstat from picking up just reclaimed
+        * inodes that have their in-core state initialized but not flushed
+        * to disk yet. This is a temporary hack that would require a proper
+        * fix in the future.
+        */
+       if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC ||
+           !XFS_DINODE_GOOD_VERSION(dip->di_core.di_version) ||
+           !dip->di_core.di_nlink)
+               return 0;
+       if (flags & BULKSTAT_FG_QUICK) {
+               *dipp = dip;
+               return 1;
+       }
+       /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
+       aformat = dip->di_core.di_aformat;
+       if ((XFS_CFORK_Q(&dip->di_core) == 0) ||
+           (aformat == XFS_DINODE_FMT_LOCAL) ||
+           (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) {
+               *dipp = dip;
+               return 1;
+       }
+       return 1;
+}
+
 /*
  * Return stat information in bulk (by-inode) for the filesystem.
  */
@@ -284,10 +348,11 @@ xfs_bulkstat(
        xfs_agino_t             gino;   /* current btree rec's start inode */
        int                     i;      /* loop index */
        int                     icount; /* count of inodes good in irbuf */
+       size_t                  irbsize; /* size of irec buffer in bytes */
        xfs_ino_t               ino;    /* inode number (filesystem) */
-       xfs_inobt_rec_t         *irbp;  /* current irec buffer pointer */
-       xfs_inobt_rec_t         *irbuf; /* start of irec buffer */
-       xfs_inobt_rec_t         *irbufend; /* end of good irec buffer entries */
+       xfs_inobt_rec_incore_t  *irbp;  /* current irec buffer pointer */
+       xfs_inobt_rec_incore_t  *irbuf; /* start of irec buffer */
+       xfs_inobt_rec_incore_t  *irbufend; /* end of good irec buffer entries */
        xfs_ino_t               lastino=0; /* last inode number returned */
        int                     nbcluster; /* # of blocks in a cluster */
        int                     nicluster; /* # of inodes in a cluster */
@@ -328,13 +393,10 @@ xfs_bulkstat(
                (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
        nimask = ~(nicluster - 1);
        nbcluster = nicluster >> mp->m_sb.sb_inopblog;
-       /*
-        * Allocate a page-sized buffer for inode btree records.
-        * We could try allocating something smaller, but for normal
-        * calls we'll always (potentially) need the whole page.
-        */
-       irbuf = kmem_alloc(NBPC, KM_SLEEP);
-       nirbuf = NBPC / sizeof(*irbuf);
+       irbuf = kmem_zalloc_greedy(&irbsize, NBPC, NBPC * 4,
+                                  KM_SLEEP | KM_MAYFAIL | KM_LARGE);
+       nirbuf = irbsize / sizeof(*irbuf);
+
        /*
         * Loop over the allocation groups, starting from the last
         * inode returned; 0 means start of the allocation group.
@@ -358,7 +420,7 @@ xfs_bulkstat(
                 * Allocate and initialize a btree cursor for ialloc btree.
                 */
                cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO,
-                       (xfs_inode_t *)0, 0);
+                                               (xfs_inode_t *)0, 0);
                irbp = irbuf;
                irbufend = irbuf + nirbuf;
                end_of_ag = 0;
@@ -395,9 +457,9 @@ xfs_bulkstat(
                                                gcnt++;
                                }
                                gfree |= XFS_INOBT_MASKN(0, chunkidx);
-                               INT_SET(irbp->ir_startino, ARCH_CONVERT, gino);
-                               INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt);
-                               INT_SET(irbp->ir_free, ARCH_CONVERT, gfree);
+                               irbp->ir_startino = gino;
+                               irbp->ir_freecount = gcnt;
+                               irbp->ir_free = gfree;
                                irbp++;
                                agino = gino + XFS_INODES_PER_CHUNK;
                                icount = XFS_INODES_PER_CHUNK - gcnt;
@@ -451,11 +513,27 @@ xfs_bulkstat(
                        }
                        /*
                         * If this chunk has any allocated inodes, save it.
+                        * Also start read-ahead now for this chunk.
                         */
                        if (gcnt < XFS_INODES_PER_CHUNK) {
-                               INT_SET(irbp->ir_startino, ARCH_CONVERT, gino);
-                               INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt);
-                               INT_SET(irbp->ir_free, ARCH_CONVERT, gfree);
+                               /*
+                                * Loop over all clusters in the next chunk.
+                                * Do a readahead if there are any allocated
+                                * inodes in that cluster.
+                                */
+                               for (agbno = XFS_AGINO_TO_AGBNO(mp, gino),
+                                    chunkidx = 0;
+                                    chunkidx < XFS_INODES_PER_CHUNK;
+                                    chunkidx += nicluster,
+                                    agbno += nbcluster) {
+                                       if (XFS_INOBT_MASKN(chunkidx,
+                                                           nicluster) & ~gfree)
+                                               xfs_btree_reada_bufs(mp, agno,
+                                                       agbno, nbcluster);
+                               }
+                               irbp->ir_startino = gino;
+                               irbp->ir_freecount = gcnt;
+                               irbp->ir_free = gfree;
                                irbp++;
                                icount += XFS_INODES_PER_CHUNK - gcnt;
                        }
@@ -478,34 +556,12 @@ xfs_bulkstat(
                irbufend = irbp;
                for (irbp = irbuf;
                     irbp < irbufend && ubleft >= statstruct_size; irbp++) {
-                       /*
-                        * Read-ahead the next chunk's worth of inodes.
-                        */
-                       if (&irbp[1] < irbufend) {
-                               /*
-                                * Loop over all clusters in the next chunk.
-                                * Do a readahead if there are any allocated
-                                * inodes in that cluster.
-                                */
-                               for (agbno = XFS_AGINO_TO_AGBNO(mp,
-                                                       INT_GET(irbp[1].ir_startino, ARCH_CONVERT)),
-                                    chunkidx = 0;
-                                    chunkidx < XFS_INODES_PER_CHUNK;
-                                    chunkidx += nicluster,
-                                    agbno += nbcluster) {
-                                       if (XFS_INOBT_MASKN(chunkidx,
-                                                           nicluster) &
-                                           ~(INT_GET(irbp[1].ir_free, ARCH_CONVERT)))
-                                               xfs_btree_reada_bufs(mp, agno,
-                                                       agbno, nbcluster);
-                               }
-                       }
                        /*
                         * Now process this chunk of inodes.
                         */
-                       for (agino = INT_GET(irbp->ir_startino, ARCH_CONVERT), chunkidx = 0, clustidx = 0;
+                       for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
                             ubleft > 0 &&
-                               INT_GET(irbp->ir_freecount, ARCH_CONVERT) < XFS_INODES_PER_CHUNK;
+                               irbp->ir_freecount < XFS_INODES_PER_CHUNK;
                             chunkidx++, clustidx++, agino++) {
                                ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
                                /*
@@ -525,11 +581,12 @@ xfs_bulkstat(
                                 */
                                if ((chunkidx & (nicluster - 1)) == 0) {
                                        agbno = XFS_AGINO_TO_AGBNO(mp,
-                                                       INT_GET(irbp->ir_startino, ARCH_CONVERT)) +
+                                                       irbp->ir_startino) +
                                                ((chunkidx & nimask) >>
                                                 mp->m_sb.sb_inopblog);
 
-                                       if (flags & BULKSTAT_FG_QUICK) {
+                                       if (flags & (BULKSTAT_FG_QUICK |
+                                                    BULKSTAT_FG_INLINE)) {
                                                ino = XFS_AGINO_TO_INO(mp, agno,
                                                                       agino);
                                                bno = XFS_AGB_TO_DADDR(mp, agno,
@@ -543,6 +600,7 @@ xfs_bulkstat(
                                                                      KM_SLEEP);
                                                ip->i_ino = ino;
                                                ip->i_mount = mp;
+                                               spin_lock_init(&ip->i_flags_lock);
                                                if (bp)
                                                        xfs_buf_relse(bp);
                                                error = xfs_itobp(mp, NULL, ip,
@@ -564,30 +622,34 @@ xfs_bulkstat(
                                /*
                                 * Skip if this inode is free.
                                 */
-                               if (XFS_INOBT_MASK(chunkidx) & INT_GET(irbp->ir_free, ARCH_CONVERT))
+                               if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
                                        continue;
                                /*
                                 * Count used inodes as free so we can tell
                                 * when the chunk is used up.
                                 */
-                               INT_MOD(irbp->ir_freecount, ARCH_CONVERT, +1);
+                               irbp->ir_freecount++;
                                ino = XFS_AGINO_TO_INO(mp, agno, agino);
                                bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
-                               if (flags & BULKSTAT_FG_QUICK) {
-                                       dip = (xfs_dinode_t *)xfs_buf_offset(bp,
-                                             (clustidx << mp->m_sb.sb_inodelog));
-
-                                       if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT)
-                                                   != XFS_DINODE_MAGIC
-                                           || !XFS_DINODE_GOOD_VERSION(
-                                                   INT_GET(dip->di_core.di_version, ARCH_CONVERT)))
-                                               continue;
+                               if (!xfs_bulkstat_use_dinode(mp, flags, bp,
+                                                            clustidx, &dip))
+                                       continue;
+                               /*
+                                * If we need to do an iget, cannot hold bp.
+                                * Drop it, until starting the next cluster.
+                                */
+                               if ((flags & BULKSTAT_FG_INLINE) && !dip) {
+                                       if (bp)
+                                               xfs_buf_relse(bp);
+                                       bp = NULL;
                                }
 
                                /*
                                 * Get the inode and fill in a single buffer.
                                 * BULKSTAT_FG_QUICK uses dip to fill it in.
                                 * BULKSTAT_FG_IGET uses igets.
+                                * BULKSTAT_FG_INLINE uses dip if we have an
+                                * inline attr fork, else igets.
                                 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
                                 * This is also used to count inodes/blks, etc
                                 * in xfs_qm_quotacheck.
@@ -597,8 +659,15 @@ xfs_bulkstat(
                                                ubleft, private_data,
                                                bno, &ubused, dip, &fmterror);
                                if (fmterror == BULKSTAT_RV_NOTHING) {
-                                       if (error == ENOMEM)
+                                        if (error == EFAULT) {
+                                                ubleft = 0;
+                                                rval = error;
+                                                break;
+                                        }
+                                       else if (error == ENOMEM)
                                                ubleft = 0;
+                                       else
+                                               lastino = ino;
                                        continue;
                                }
                                if (fmterror == BULKSTAT_RV_GIVEUP) {
@@ -633,7 +702,7 @@ xfs_bulkstat(
        /*
         * Done, we're either out of filesystem or space to put the data.
         */
-       kmem_free(irbuf, NBPC);
+       kmem_free(irbuf, irbsize);
        *ubcountp = ubelem;
        if (agno >= mp->m_sb.sb_agcount) {
                /*
@@ -698,6 +767,19 @@ xfs_bulkstat_single(
        return 0;
 }
 
+int
+xfs_inumbers_fmt(
+       void                    __user *ubuffer, /* buffer to write to */
+       const xfs_inogrp_t      *buffer,        /* buffer to read from */
+       long                    count,          /* # of elements to read */
+       long                    *written)       /* # of bytes written */
+{
+       if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
+               return -EFAULT;
+       *written = count * sizeof(*buffer);
+       return 0;
+}
+
 /*
  * Return inode number table for the filesystem.
  */
@@ -706,7 +788,8 @@ xfs_inumbers(
        xfs_mount_t     *mp,            /* mount point for filesystem */
        xfs_ino_t       *lastino,       /* last inode returned */
        int             *count,         /* size of buffer/count returned */
-       xfs_inogrp_t    __user *ubuffer)/* buffer with inode descriptions */
+       void            __user *ubuffer,/* buffer with inode descriptions */
+       inumbers_fmt_pf formatter)
 {
        xfs_buf_t       *agbp;
        xfs_agino_t     agino;
@@ -759,7 +842,7 @@ xfs_inumbers(
                                xfs_buf_relse(agbp);
                                agbp = NULL;
                                /*
-                                * Move up the the last inode in the current
+                                * Move up the last inode in the current
                                 * chunk.  The lookup_ge will always get
                                 * us the first inode in the next chunk.
                                 */
@@ -785,12 +868,12 @@ xfs_inumbers(
                bufidx++;
                left--;
                if (bufidx == bcount) {
-                       if (copy_to_user(ubuffer, buffer,
-                                       bufidx * sizeof(*buffer))) {
+                       long written;
+                       if (formatter(ubuffer, buffer, bufidx, &written)) {
                                error = XFS_ERROR(EFAULT);
                                break;
                        }
-                       ubuffer += bufidx;
+                       ubuffer += written;
                        *count += bufidx;
                        bufidx = 0;
                }
@@ -812,8 +895,8 @@ xfs_inumbers(
        }
        if (!error) {
                if (bufidx) {
-                       if (copy_to_user(ubuffer, buffer,
-                                       bufidx * sizeof(*buffer)))
+                       long written;
+                       if (formatter(ubuffer, buffer, bufidx, &written))
                                error = XFS_ERROR(EFAULT);
                        else
                                *count += bufidx;