static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
.ra_pages = 0, /* No readahead */
- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
.unplug_io_fn = default_unplug_io_fn,
};
error = 1;
if (!inode)
goto out;
- /* Precharge page while we can wait, compensate afterwards */
+ /* Precharge page using GFP_KERNEL while we can wait */
error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
if (error)
goto out;
error = radix_tree_preload(GFP_KERNEL);
- if (error)
- goto uncharge;
+ if (error) {
+ mem_cgroup_uncharge_cache_page(page);
+ goto out;
+ }
error = 1;
spin_lock(&info->lock);
ptr = shmem_swp_entry(info, idx, NULL);
- if (ptr && ptr->val == entry.val)
- error = add_to_page_cache(page, inode->i_mapping,
+ if (ptr && ptr->val == entry.val) {
+ error = add_to_page_cache_locked(page, inode->i_mapping,
idx, GFP_NOWAIT);
+ /* does mem_cgroup_uncharge_cache_page on error */
+ } else /* we must compensate for our precharge above */
+ mem_cgroup_uncharge_cache_page(page);
+
if (error == -EEXIST) {
struct page *filepage = find_get_page(inode->i_mapping, idx);
error = 1;
shmem_swp_unmap(ptr);
spin_unlock(&info->lock);
radix_tree_preload_end();
-uncharge:
- mem_cgroup_uncharge_page(page);
out:
unlock_page(page);
page_cache_release(page);
#ifdef CONFIG_NUMA
#ifdef CONFIG_TMPFS
-static int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
{
- char *nodelist = strchr(value, ':');
- int err = 1;
+ char buffer[64];
- if (nodelist) {
- /* NUL-terminate policy string */
- *nodelist++ = '\0';
- if (nodelist_parse(nodelist, *policy_nodes))
- goto out;
- if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY]))
- goto out;
- }
- if (!strcmp(value, "default")) {
- *policy = MPOL_DEFAULT;
- /* Don't allow a nodelist */
- if (!nodelist)
- err = 0;
- } else if (!strcmp(value, "prefer")) {
- *policy = MPOL_PREFERRED;
- /* Insist on a nodelist of one node only */
- if (nodelist) {
- char *rest = nodelist;
- while (isdigit(*rest))
- rest++;
- if (!*rest)
- err = 0;
- }
- } else if (!strcmp(value, "bind")) {
- *policy = MPOL_BIND;
- /* Insist on a nodelist */
- if (nodelist)
- err = 0;
- } else if (!strcmp(value, "interleave")) {
- *policy = MPOL_INTERLEAVE;
- /*
- * Default to online nodes with memory if no nodelist
- */
- if (!nodelist)
- *policy_nodes = node_states[N_HIGH_MEMORY];
- err = 0;
- }
-out:
- /* Restore string for error message */
- if (nodelist)
- *--nodelist = ':';
- return err;
-}
+ if (!mpol || mpol->mode == MPOL_DEFAULT)
+ return; /* show nothing */
-static void shmem_show_mpol(struct seq_file *seq, int policy,
- const nodemask_t policy_nodes)
-{
- char *policy_string;
+ mpol_to_str(buffer, sizeof(buffer), mpol, 1);
- switch (policy) {
- case MPOL_PREFERRED:
- policy_string = "prefer";
- break;
- case MPOL_BIND:
- policy_string = "bind";
- break;
- case MPOL_INTERLEAVE:
- policy_string = "interleave";
- break;
- default:
- /* MPOL_DEFAULT */
- return;
- }
-
- seq_printf(seq, ",mpol=%s", policy_string);
-
- if (policy != MPOL_INTERLEAVE ||
- !nodes_equal(policy_nodes, node_states[N_HIGH_MEMORY])) {
- char buffer[64];
- int len;
+ seq_printf(seq, ",mpol=%s", buffer);
+}
- len = nodelist_scnprintf(buffer, sizeof(buffer), policy_nodes);
- if (len < sizeof(buffer))
- seq_printf(seq, ":%s", buffer);
- else
- seq_printf(seq, ":?");
+static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+ struct mempolicy *mpol = NULL;
+ if (sbinfo->mpol) {
+ spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */
+ mpol = sbinfo->mpol;
+ mpol_get(mpol);
+ spin_unlock(&sbinfo->stat_lock);
}
+ return mpol;
}
#endif /* CONFIG_TMPFS */
static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
struct shmem_inode_info *info, unsigned long idx)
{
+ struct mempolicy mpol, *spol;
struct vm_area_struct pvma;
struct page *page;
+ spol = mpol_cond_copy(&mpol,
+ mpol_shared_policy_lookup(&info->policy, idx));
+
/* Create a pseudo vma that just contains the policy */
pvma.vm_start = 0;
pvma.vm_pgoff = idx;
pvma.vm_ops = NULL;
- pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
+ pvma.vm_policy = spol;
page = swapin_readahead(entry, gfp, &pvma, 0);
- mpol_free(pvma.vm_policy);
return page;
}
struct shmem_inode_info *info, unsigned long idx)
{
struct vm_area_struct pvma;
- struct page *page;
/* Create a pseudo vma that just contains the policy */
pvma.vm_start = 0;
pvma.vm_pgoff = idx;
pvma.vm_ops = NULL;
pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
- page = alloc_page_vma(gfp, &pvma, 0);
- mpol_free(pvma.vm_policy);
- return page;
+
+ /*
+ * alloc_page_vma() will drop the shared policy reference
+ */
+ return alloc_page_vma(gfp, &pvma, 0);
}
#else /* !CONFIG_NUMA */
#ifdef CONFIG_TMPFS
-static inline int shmem_parse_mpol(char *value, int *policy,
- nodemask_t *policy_nodes)
-{
- return 1;
-}
-
-static inline void shmem_show_mpol(struct seq_file *seq, int policy,
- const nodemask_t policy_nodes)
+static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
{
}
#endif /* CONFIG_TMPFS */
}
#endif /* CONFIG_NUMA */
+#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
+static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+ return NULL;
+}
+#endif
+
/*
* shmem_getpage - either get the page from swap or allocate a new one
*
SetPageUptodate(filepage);
set_page_dirty(filepage);
swap_free(swap);
- } else if (!(error = add_to_page_cache(
- swappage, mapping, idx, GFP_NOWAIT))) {
+ } else if (!(error = add_to_page_cache_locked(swappage, mapping,
+ idx, GFP_NOWAIT))) {
info->flags |= SHMEM_PAGEIN;
shmem_swp_set(info, entry, 0);
shmem_swp_unmap(entry);
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
unlock_page(swappage);
+ page_cache_release(swappage);
if (error == -ENOMEM) {
/* allow reclaim from this memory cgroup */
- error = mem_cgroup_cache_charge(swappage,
- current->mm, gfp & ~__GFP_HIGHMEM);
- if (error) {
- page_cache_release(swappage);
+ error = mem_cgroup_shrink_usage(current->mm,
+ gfp);
+ if (error)
goto failed;
- }
- mem_cgroup_uncharge_page(swappage);
}
- page_cache_release(swappage);
goto repeat;
}
} else if (sgp == SGP_READ && !filepage) {
}
if (!filepage) {
+ int ret;
+
spin_unlock(&info->lock);
filepage = shmem_alloc_page(gfp, info, idx);
if (!filepage) {
swap = *entry;
shmem_swp_unmap(entry);
}
- if (error || swap.val || 0 != add_to_page_cache_lru(
- filepage, mapping, idx, GFP_NOWAIT)) {
+ ret = error || swap.val;
+ if (ret)
+ mem_cgroup_uncharge_cache_page(filepage);
+ else
+ ret = add_to_page_cache_lru(filepage, mapping,
+ idx, GFP_NOWAIT);
+ /*
+ * At add_to_page_cache_lru() failure, uncharge will
+ * be done automatically.
+ */
+ if (ret) {
spin_unlock(&info->lock);
- mem_cgroup_uncharge_page(filepage);
page_cache_release(filepage);
shmem_unacct_blocks(info->flags, 1);
shmem_free_blocks(inode, 1);
goto failed;
goto repeat;
}
- mem_cgroup_uncharge_page(filepage);
info->flags |= SHMEM_PAGEIN;
}
case S_IFREG:
inode->i_op = &shmem_inode_operations;
inode->i_fop = &shmem_file_operations;
- mpol_shared_policy_init(&info->policy, sbinfo->policy,
- &sbinfo->policy_nodes);
+ mpol_shared_policy_init(&info->policy,
+ shmem_get_sbmpol(sbinfo));
break;
case S_IFDIR:
inc_nlink(inode);
* Must not load anything in the rbtree,
* mpol_free_shared_policy will not be called.
*/
- mpol_shared_policy_init(&info->policy, MPOL_DEFAULT,
- NULL);
+ mpol_shared_policy_init(&info->policy, NULL);
break;
}
} else
file_accessed(filp);
}
-static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t shmem_file_aio_read(struct kiocb *iocb,
+ const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
- read_descriptor_t desc;
+ struct file *filp = iocb->ki_filp;
+ ssize_t retval;
+ unsigned long seg;
+ size_t count;
+ loff_t *ppos = &iocb->ki_pos;
- if ((ssize_t) count < 0)
- return -EINVAL;
- if (!access_ok(VERIFY_WRITE, buf, count))
- return -EFAULT;
- if (!count)
- return 0;
+ retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
+ if (retval)
+ return retval;
- desc.written = 0;
- desc.count = count;
- desc.arg.buf = buf;
- desc.error = 0;
+ for (seg = 0; seg < nr_segs; seg++) {
+ read_descriptor_t desc;
- do_shmem_file_read(filp, ppos, &desc, file_read_actor);
- if (desc.written)
- return desc.written;
- return desc.error;
+ desc.written = 0;
+ desc.arg.buf = iov[seg].iov_base;
+ desc.count = iov[seg].iov_len;
+ if (desc.count == 0)
+ continue;
+ desc.error = 0;
+ do_shmem_file_read(filp, ppos, &desc, file_read_actor);
+ retval += desc.written;
+ if (desc.error) {
+ retval = retval ?: desc.error;
+ break;
+ }
+ if (desc.count > 0)
+ break;
+ }
+ return retval;
}
static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
if (*rest)
goto bad_val;
} else if (!strcmp(this_char,"mpol")) {
- if (shmem_parse_mpol(value, &sbinfo->policy,
- &sbinfo->policy_nodes))
+ if (mpol_parse_str(value, &sbinfo->mpol, 1))
goto bad_val;
} else {
printk(KERN_ERR "tmpfs: Bad mount option %s\n",
sbinfo->free_blocks = config.max_blocks - blocks;
sbinfo->max_inodes = config.max_inodes;
sbinfo->free_inodes = config.max_inodes - inodes;
- sbinfo->policy = config.policy;
- sbinfo->policy_nodes = config.policy_nodes;
+
+ mpol_put(sbinfo->mpol);
+ sbinfo->mpol = config.mpol; /* transfers initial ref */
out:
spin_unlock(&sbinfo->stat_lock);
return error;
seq_printf(seq, ",uid=%u", sbinfo->uid);
if (sbinfo->gid != 0)
seq_printf(seq, ",gid=%u", sbinfo->gid);
- shmem_show_mpol(seq, sbinfo->policy, sbinfo->policy_nodes);
+ shmem_show_mpol(seq, sbinfo->mpol);
return 0;
}
#endif /* CONFIG_TMPFS */
sbinfo->mode = S_IRWXUGO | S_ISVTX;
sbinfo->uid = current->fsuid;
sbinfo->gid = current->fsgid;
- sbinfo->policy = MPOL_DEFAULT;
- sbinfo->policy_nodes = node_states[N_HIGH_MEMORY];
+ sbinfo->mpol = NULL;
sb->s_fs_info = sbinfo;
#ifdef CONFIG_TMPFS
kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
.mmap = shmem_mmap,
#ifdef CONFIG_TMPFS
.llseek = generic_file_llseek,
- .read = shmem_file_read,
+ .read = do_sync_read,
.write = do_sync_write,
+ .aio_read = shmem_file_aio_read,
.aio_write = generic_file_aio_write,
.fsync = simple_sync_file,
.splice_read = generic_file_splice_read,