ocfs2: return the physical address in ocfs2_write_cluster
[linux-2.6-block.git] / fs / ocfs2 / aops.c
index 043110e5212dd8c6ab2e7118ac8f17746448dac0..7b268c357cf34c20b4b6dc2da0b74d7b16df0e0c 100644 (file)
@@ -1212,7 +1212,7 @@ struct ocfs2_write_cluster_desc {
         * filled.
         */
        unsigned        c_new;
-       unsigned        c_unwritten;
+       unsigned        c_clear_unwritten;
        unsigned        c_needs_zero;
 };
 
@@ -1224,6 +1224,9 @@ struct ocfs2_write_ctxt {
        /* First cluster allocated in a nonsparse extend */
        u32                             w_first_new_cpos;
 
+       /* Type of caller. Must be one of buffer, mmap, direct.  */
+       ocfs2_write_type_t              w_type;
+
        struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
 
        /*
@@ -1319,7 +1322,8 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
 
 static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
                                  struct ocfs2_super *osb, loff_t pos,
-                                 unsigned len, struct buffer_head *di_bh)
+                                 unsigned len, ocfs2_write_type_t type,
+                                 struct buffer_head *di_bh)
 {
        u32 cend;
        struct ocfs2_write_ctxt *wc;
@@ -1334,6 +1338,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
        wc->w_clen = cend - wc->w_cpos + 1;
        get_bh(di_bh);
        wc->w_di_bh = di_bh;
+       wc->w_type = type;
 
        if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))
                wc->w_large_pages = 1;
@@ -1401,12 +1406,13 @@ static void ocfs2_write_failure(struct inode *inode,
                to = user_pos + user_len;
        struct page *tmppage;
 
-       ocfs2_zero_new_buffers(wc->w_target_page, from, to);
+       if (wc->w_target_page)
+               ocfs2_zero_new_buffers(wc->w_target_page, from, to);
 
        for(i = 0; i < wc->w_num_pages; i++) {
                tmppage = wc->w_pages[i];
 
-               if (page_has_buffers(tmppage)) {
+               if (tmppage && page_has_buffers(tmppage)) {
                        if (ocfs2_should_order_data(inode))
                                ocfs2_jbd2_file_inode(wc->w_handle, inode);
 
@@ -1536,11 +1542,13 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
                wc->w_num_pages = 1;
                start = target_index;
        }
+       end_index = (user_pos + user_len - 1) >> PAGE_CACHE_SHIFT;
 
        for(i = 0; i < wc->w_num_pages; i++) {
                index = start + i;
 
-               if (index == target_index && mmap_page) {
+               if (index >= target_index && index <= end_index &&
+                   wc->w_type == OCFS2_WRITE_MMAP) {
                        /*
                         * ocfs2_pagemkwrite() is a little different
                         * and wants us to directly use the page
@@ -1559,6 +1567,11 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
                        page_cache_get(mmap_page);
                        wc->w_pages[i] = mmap_page;
                        wc->w_target_locked = true;
+               } else if (index >= target_index && index <= end_index &&
+                          wc->w_type == OCFS2_WRITE_DIRECT) {
+                       /* Direct write has no mapping page. */
+                       wc->w_pages[i] = NULL;
+                       continue;
                } else {
                        wc->w_pages[i] = find_or_create_page(mapping, index,
                                                             GFP_NOFS);
@@ -1583,19 +1596,20 @@ out:
  * Prepare a single cluster for write one cluster into the file.
  */
 static int ocfs2_write_cluster(struct address_space *mapping,
-                              u32 phys, unsigned int unwritten,
+                              u32 *phys, unsigned int new,
+                              unsigned int clear_unwritten,
                               unsigned int should_zero,
                               struct ocfs2_alloc_context *data_ac,
                               struct ocfs2_alloc_context *meta_ac,
                               struct ocfs2_write_ctxt *wc, u32 cpos,
                               loff_t user_pos, unsigned user_len)
 {
-       int ret, i, new;
-       u64 v_blkno, p_blkno;
+       int ret, i;
+       u64 p_blkno;
        struct inode *inode = mapping->host;
        struct ocfs2_extent_tree et;
+       int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 
-       new = phys == 0 ? 1 : 0;
        if (new) {
                u32 tmp_pos;
 
@@ -1605,9 +1619,9 @@ static int ocfs2_write_cluster(struct address_space *mapping,
                 */
                tmp_pos = cpos;
                ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode,
-                                          &tmp_pos, 1, 0, wc->w_di_bh,
-                                          wc->w_handle, data_ac,
-                                          meta_ac, NULL);
+                                          &tmp_pos, 1, !clear_unwritten,
+                                          wc->w_di_bh, wc->w_handle,
+                                          data_ac, meta_ac, NULL);
                /*
                 * This shouldn't happen because we must have already
                 * calculated the correct meta data allocation required. The
@@ -1624,11 +1638,11 @@ static int ocfs2_write_cluster(struct address_space *mapping,
                        mlog_errno(ret);
                        goto out;
                }
-       } else if (unwritten) {
+       } else if (clear_unwritten) {
                ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
                                              wc->w_di_bh);
                ret = ocfs2_mark_extent_written(inode, &et,
-                                               wc->w_handle, cpos, 1, phys,
+                                               wc->w_handle, cpos, 1, *phys,
                                                meta_ac, &wc->w_dealloc);
                if (ret < 0) {
                        mlog_errno(ret);
@@ -1636,30 +1650,33 @@ static int ocfs2_write_cluster(struct address_space *mapping,
                }
        }
 
-       if (should_zero)
-               v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);
-       else
-               v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;
-
        /*
         * The only reason this should fail is due to an inability to
         * find the extent added.
         */
-       ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
-                                         NULL);
+       ret = ocfs2_get_clusters(inode, cpos, phys, NULL, NULL);
        if (ret < 0) {
                mlog(ML_ERROR, "Get physical blkno failed for inode %llu, "
-                           "at logical block %llu",
-                           (unsigned long long)OCFS2_I(inode)->ip_blkno,
-                           (unsigned long long)v_blkno);
+                           "at logical cluster %u",
+                           (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
                goto out;
        }
 
-       BUG_ON(p_blkno == 0);
+       BUG_ON(*phys == 0);
+
+       p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *phys);
+       if (!should_zero)
+               p_blkno += (user_pos >> inode->i_sb->s_blocksize_bits) & (u64)(bpc - 1);
 
        for(i = 0; i < wc->w_num_pages; i++) {
                int tmpret;
 
+               /* This is the direct io target page. */
+               if (wc->w_pages[i] == NULL) {
+                       p_blkno++;
+                       continue;
+               }
+
                tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,
                                                      wc->w_pages[i], cpos,
                                                      user_pos, user_len,
@@ -1706,8 +1723,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
                if ((cluster_off + local_len) > osb->s_clustersize)
                        local_len = osb->s_clustersize - cluster_off;
 
-               ret = ocfs2_write_cluster(mapping, desc->c_phys,
-                                         desc->c_unwritten,
+               ret = ocfs2_write_cluster(mapping, &desc->c_phys,
+                                         desc->c_new,
+                                         desc->c_clear_unwritten,
                                          desc->c_needs_zero,
                                          data_ac, meta_ac,
                                          wc, desc->c_cpos, pos, local_len);
@@ -1852,11 +1870,12 @@ static int ocfs2_populate_write_desc(struct inode *inode,
                if (phys == 0) {
                        desc->c_new = 1;
                        desc->c_needs_zero = 1;
+                       desc->c_clear_unwritten = 1;
                        *clusters_to_alloc = *clusters_to_alloc + 1;
                }
 
                if (ext_flags & OCFS2_EXT_UNWRITTEN) {
-                       desc->c_unwritten = 1;
+                       desc->c_clear_unwritten = 1;
                        desc->c_needs_zero = 1;
                }
 
@@ -2022,8 +2041,10 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode,
        if (ret)
                mlog_errno(ret);
 
-       wc->w_first_new_cpos =
-               ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
+       /* There is no wc if this is call from direct. */
+       if (wc)
+               wc->w_first_new_cpos =
+                       ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
 
        return ret;
 }
@@ -2077,9 +2098,8 @@ out:
        return ret;
 }
 
-int ocfs2_write_begin_nolock(struct file *filp,
-                            struct address_space *mapping,
-                            loff_t pos, unsigned len, unsigned flags,
+int ocfs2_write_begin_nolock(struct address_space *mapping,
+                            loff_t pos, unsigned len, ocfs2_write_type_t type,
                             struct page **pagep, void **fsdata,
                             struct buffer_head *di_bh, struct page *mmap_page)
 {
@@ -2096,7 +2116,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
        int try_free = 1, ret1;
 
 try_again:
-       ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
+       ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, type, di_bh);
        if (ret) {
                mlog_errno(ret);
                return ret;
@@ -2115,14 +2135,17 @@ try_again:
                }
        }
 
-       if (ocfs2_sparse_alloc(osb))
-               ret = ocfs2_zero_tail(inode, di_bh, pos);
-       else
-               ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
-                                                  wc);
-       if (ret) {
-               mlog_errno(ret);
-               goto out;
+       /* Direct io change i_size late, should not zero tail here. */
+       if (type != OCFS2_WRITE_DIRECT) {
+               if (ocfs2_sparse_alloc(osb))
+                       ret = ocfs2_zero_tail(inode, di_bh, pos);
+               else
+                       ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos,
+                                                          len, wc);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
        }
 
        ret = ocfs2_check_range_for_refcount(inode, pos, len);
@@ -2153,7 +2176,7 @@ try_again:
                        (unsigned long long)OCFS2_I(inode)->ip_blkno,
                        (long long)i_size_read(inode),
                        le32_to_cpu(di->i_clusters),
-                       pos, len, flags, mmap_page,
+                       pos, len, type, mmap_page,
                        clusters_to_alloc, extents_to_split);
 
        /*
@@ -2183,8 +2206,9 @@ try_again:
 
                credits = ocfs2_calc_extend_credits(inode->i_sb,
                                                    &di->id2.i_list);
-
-       }
+       } else if (type == OCFS2_WRITE_DIRECT)
+               /* direct write needs not to start trans if no extents alloc. */
+               goto success;
 
        /*
         * We have to zero sparse allocated clusters, unwritten extent clusters,
@@ -2260,7 +2284,8 @@ try_again:
                ocfs2_free_alloc_context(meta_ac);
 
 success:
-       *pagep = wc->w_target_page;
+       if (pagep)
+               *pagep = wc->w_target_page;
        *fsdata = wc;
        return 0;
 out_quota:
@@ -2323,8 +2348,8 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
         */
        down_write(&OCFS2_I(inode)->ip_alloc_sem);
 
-       ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep,
-                                      fsdata, di_bh, NULL);
+       ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_BUFFER,
+                                      pagep, fsdata, di_bh, NULL);
        if (ret) {
                mlog_errno(ret);
                goto out_fail;
@@ -2381,12 +2406,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
        handle_t *handle = wc->w_handle;
        struct page *tmppage;
 
-       ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
-                       OCFS2_JOURNAL_ACCESS_WRITE);
-       if (ret) {
-               copied = ret;
-               mlog_errno(ret);
-               goto out;
+       if (handle) {
+               ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+                               wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+               if (ret) {
+                       copied = ret;
+                       mlog_errno(ret);
+                       goto out;
+               }
        }
 
        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
@@ -2394,18 +2421,23 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
                goto out_write_size;
        }
 
-       if (unlikely(copied < len)) {
+       if (unlikely(copied < len) && wc->w_target_page) {
                if (!PageUptodate(wc->w_target_page))
                        copied = 0;
 
                ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
                                       start+len);
        }
-       flush_dcache_page(wc->w_target_page);
+       if (wc->w_target_page)
+               flush_dcache_page(wc->w_target_page);
 
        for(i = 0; i < wc->w_num_pages; i++) {
                tmppage = wc->w_pages[i];
 
+               /* This is the direct io target page. */
+               if (tmppage == NULL)
+                       continue;
+
                if (tmppage == wc->w_target_page) {
                        from = wc->w_target_from;
                        to = wc->w_target_to;
@@ -2424,25 +2456,29 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
                }
 
                if (page_has_buffers(tmppage)) {
-                       if (ocfs2_should_order_data(inode))
-                               ocfs2_jbd2_file_inode(wc->w_handle, inode);
+                       if (handle && ocfs2_should_order_data(inode))
+                               ocfs2_jbd2_file_inode(handle, inode);
                        block_commit_write(tmppage, from, to);
                }
        }
 
 out_write_size:
-       pos += copied;
-       if (pos > i_size_read(inode)) {
-               i_size_write(inode, pos);
-               mark_inode_dirty(inode);
-       }
-       inode->i_blocks = ocfs2_inode_sector_count(inode);
-       di->i_size = cpu_to_le64((u64)i_size_read(inode));
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-       di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
-       di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
-       ocfs2_update_inode_fsync_trans(handle, inode, 1);
-       ocfs2_journal_dirty(handle, wc->w_di_bh);
+       /* Direct io do not update i_size here. */
+       if (wc->w_type != OCFS2_WRITE_DIRECT) {
+               pos += copied;
+               if (pos > i_size_read(inode)) {
+                       i_size_write(inode, pos);
+                       mark_inode_dirty(inode);
+               }
+               inode->i_blocks = ocfs2_inode_sector_count(inode);
+               di->i_size = cpu_to_le64((u64)i_size_read(inode));
+               inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+               di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+               di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+               ocfs2_update_inode_fsync_trans(handle, inode, 1);
+       }
+       if (handle)
+               ocfs2_journal_dirty(handle, wc->w_di_bh);
 
 out:
        /* unlock pages before dealloc since it needs acquiring j_trans_barrier
@@ -2452,7 +2488,8 @@ out:
         */
        ocfs2_unlock_pages(wc);
 
-       ocfs2_commit_trans(osb, handle);
+       if (handle)
+               ocfs2_commit_trans(osb, handle);
 
        ocfs2_run_deallocs(osb, &wc->w_dealloc);