+// SPDX-License-Identifier: GPL-2.0-only
/*
* This file implements the perfmon-2 subsystem which is used
* to program the IA-64 Performance Monitoring Unit (PMU).
#include <linux/smp.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
+ #include <linux/pseudo_fs.h>
#include <linux/bitops.h>
#include <linux/capability.h>
#include <linux/rcupdate.h>
/* forward declaration */
static const struct dentry_operations pfmfs_dentry_operations;
- static struct dentry *
- pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
+ static int pfmfs_init_fs_context(struct fs_context *fc)
{
- return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations,
- PFMFS_MAGIC);
+ struct pseudo_fs_context *ctx = init_pseudo(fc, PFMFS_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->dops = &pfmfs_dentry_operations;
+ return 0;
}
static struct file_system_type pfm_fs_type = {
- .name = "pfmfs",
- .mount = pfmfs_mount,
- .kill_sb = kill_anon_super,
+ .name = "pfmfs",
+ .init_fs_context = pfmfs_init_fs_context,
+ .kill_sb = kill_anon_super,
};
MODULE_ALIAS_FS("pfmfs");
}
/* save the current system wide pmu states */
- ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
- if (ret) {
- DPRINT(("on_each_cpu() failed: %d\n", ret));
- goto cleanup_reserve;
- }
+ on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
/* officially change to the alternate interrupt handler */
pfm_alt_intr_handler = hdl;
pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
{
int i;
- int ret;
if (hdl == NULL) return -EINVAL;
pfm_alt_intr_handler = NULL;
- ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
- if (ret) {
- DPRINT(("on_each_cpu() failed: %d\n", ret));
- }
+ on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
for_each_online_cpu(i) {
pfm_unreserve_session(NULL, 1, i);
+// SPDX-License-Identifier: GPL-2.0-only
/*
* User interface for Resource Alloction in Resource Director Technology(RDT)
*
*
* Author: Fenghua Yu <fenghua.yu@intel.com>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* More information about RDT be found in the Intel (R) x86 Architecture
* Software Developer Manual.
*/
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
- u32 sw_shareable = 0, hw_shareable = 0;
- u32 exclusive = 0, pseudo_locked = 0;
+ /*
+ * Use unsigned long even though only 32 bits are used to ensure
+ * test_bit() is used safely.
+ */
+ unsigned long sw_shareable = 0, hw_shareable = 0;
+ unsigned long exclusive = 0, pseudo_locked = 0;
struct rdt_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
}
for (i = r->cache.cbm_len - 1; i >= 0; i--) {
pseudo_locked = dom->plr ? dom->plr->cbm : 0;
- hwb = test_bit(i, (unsigned long *)&hw_shareable);
- swb = test_bit(i, (unsigned long *)&sw_shareable);
- excl = test_bit(i, (unsigned long *)&exclusive);
- psl = test_bit(i, (unsigned long *)&pseudo_locked);
+ hwb = test_bit(i, &hw_shareable);
+ swb = test_bit(i, &sw_shareable);
+ excl = test_bit(i, &exclusive);
+ psl = test_bit(i, &pseudo_locked);
if (hwb && swb)
seq_putc(seq, 'X');
else if (hwb && !swb)
ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
fc->fs_private = &ctx->kfc;
fc->ops = &rdt_fs_context_ops;
- if (fc->user_ns)
- put_user_ns(fc->user_ns);
+ put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(&init_user_ns);
fc->global = true;
return 0;
* modification to the CBM if the default does not satisfy the
* requirements.
*/
-static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
+static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
{
- /*
- * Convert the u32 _val to an unsigned long required by all the bit
- * operations within this function. No more than 32 bits of this
- * converted value can be accessed because all bit operations are
- * additionally provided with cbm_len that is initialized during
- * hardware enumeration using five bits from the EAX register and
- * thus never can exceed 32 bits.
- */
- unsigned long *val = (unsigned long *)_val;
unsigned int cbm_len = r->cache.cbm_len;
unsigned long first_bit, zero_bit;
+ unsigned long val = _val;
- if (*val == 0)
- return;
+ if (!val)
+ return 0;
- first_bit = find_first_bit(val, cbm_len);
- zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
+ first_bit = find_first_bit(&val, cbm_len);
+ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
/* Clear any remaining bits to ensure contiguous region */
- bitmap_clear(val, zero_bit, cbm_len - zero_bit);
+ bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+ return (u32)val;
}
/*
if (closid_allocated(i) && i != closid) {
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
- break;
+ /*
+ * ctrl values for locksetup aren't relevant
+ * until the schemata is written, and the mode
+ * becomes RDT_MODE_PSEUDO_LOCKED.
+ */
+ continue;
/*
* If CDP is active include peer domain's
* usage to ensure there is no overlap
* Force the initial CBM to be valid, user can
* modify the CBM based on system availability.
*/
- cbm_ensure_valid(&d->new_ctrl, r);
+ d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
/*
* Assign the u32 CBM to an unsigned long to ensure that
* bitmap_weight() does not access out-of-bound memory.
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/mount.h>
+ #include <linux/pseudo_fs.h>
#include <linux/magic.h>
#include <linux/genhd.h>
#include <linux/pfn_t.h>
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
#endif
-/**
- * __bdev_dax_supported() - Check if the device supports dax for filesystem
- * @bdev: block device to check
- * @blocksize: The block size of the device
- *
- * This is a library function for filesystems to check if the block device
- * can be mounted with dax option.
- *
- * Return: true if supported, false if unsupported
- */
-bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
+bool __generic_fsdax_supported(struct dax_device *dax_dev,
+ struct block_device *bdev, int blocksize, sector_t start,
+ sector_t sectors)
{
- struct dax_device *dax_dev;
bool dax_enabled = false;
pgoff_t pgoff, pgoff_end;
- struct request_queue *q;
char buf[BDEVNAME_SIZE];
void *kaddr, *end_kaddr;
pfn_t pfn, end_pfn;
return false;
}
- q = bdev_get_queue(bdev);
- if (!q || !blk_queue_dax(q)) {
- pr_debug("%s: error: request queue doesn't support dax\n",
- bdevname(bdev, buf));
- return false;
- }
-
- err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
+ err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
if (err) {
pr_debug("%s: error: unaligned partition for dax\n",
bdevname(bdev, buf));
return false;
}
- last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8;
+ last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
if (err) {
pr_debug("%s: error: unaligned partition for dax\n",
return false;
}
- dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
- if (!dax_dev) {
- pr_debug("%s: error: device does not support dax\n",
- bdevname(bdev, buf));
- return false;
- }
-
id = dax_read_lock();
len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
dax_read_unlock(id);
- put_dax(dax_dev);
-
if (len < 1 || len2 < 1) {
pr_debug("%s: error: dax access failed (%ld)\n",
bdevname(bdev, buf), len < 1 ? len : len2);
}
return true;
}
+EXPORT_SYMBOL_GPL(__generic_fsdax_supported);
+
+/**
+ * __bdev_dax_supported() - Check if the device supports dax for filesystem
+ * @bdev: block device to check
+ * @blocksize: The block size of the device
+ *
+ * This is a library function for filesystems to check if the block device
+ * can be mounted with dax option.
+ *
+ * Return: true if supported, false if unsupported
+ */
+bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
+{
+ struct dax_device *dax_dev;
+ struct request_queue *q;
+ char buf[BDEVNAME_SIZE];
+ bool ret;
+ int id;
+
+ q = bdev_get_queue(bdev);
+ if (!q || !blk_queue_dax(q)) {
+ pr_debug("%s: error: request queue doesn't support dax\n",
+ bdevname(bdev, buf));
+ return false;
+ }
+
+ dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+ if (!dax_dev) {
+ pr_debug("%s: error: device does not support dax\n",
+ bdevname(bdev, buf));
+ return false;
+ }
+
+ id = dax_read_lock();
+ ret = dax_supported(dax_dev, bdev, blocksize, 0,
+ i_size_read(bdev->bd_inode) / 512);
+ dax_read_unlock(id);
+
+ put_dax(dax_dev);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(__bdev_dax_supported);
#endif
DAXDEV_ALIVE,
/* gate whether dax_flush() calls the low level flush routine */
DAXDEV_WRITE_CACHE,
+ /* flag to check if device supports synchronous flush */
+ DAXDEV_SYNC,
};
/**
}
EXPORT_SYMBOL_GPL(dax_direct_access);
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+ int blocksize, sector_t start, sector_t len)
+{
+ if (!dax_alive(dax_dev))
+ return false;
+
+ return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
+}
+
size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
}
EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
+bool __dax_synchronous(struct dax_device *dax_dev)
+{
+ return test_bit(DAXDEV_SYNC, &dax_dev->flags);
+}
+EXPORT_SYMBOL_GPL(__dax_synchronous);
+
+void __set_dax_synchronous(struct dax_device *dax_dev)
+{
+ set_bit(DAXDEV_SYNC, &dax_dev->flags);
+}
+EXPORT_SYMBOL_GPL(__set_dax_synchronous);
+
bool dax_alive(struct dax_device *dax_dev)
{
lockdep_assert_held(&dax_srcu);
.drop_inode = generic_delete_inode,
};
- static struct dentry *dax_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int dax_init_fs_context(struct fs_context *fc)
{
- return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
+ struct pseudo_fs_context *ctx = init_pseudo(fc, DAXFS_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->ops = &dax_sops;
+ return 0;
}
static struct file_system_type dax_fs_type = {
- .name = "dax",
- .mount = dax_mount,
- .kill_sb = kill_anon_super,
+ .name = "dax",
+ .init_fs_context = dax_init_fs_context,
+ .kill_sb = kill_anon_super,
};
static int dax_test(struct inode *inode, void *data)
}
struct dax_device *alloc_dax(void *private, const char *__host,
- const struct dax_operations *ops)
+ const struct dax_operations *ops, unsigned long flags)
{
struct dax_device *dax_dev;
const char *host;
dax_add_host(dax_dev, host);
dax_dev->ops = ops;
dax_dev->private = private;
+ if (flags & DAXDEV_F_SYNC)
+ set_dax_synchronous(dax_dev);
+
return dax_dev;
err_dev:
if (!dax_cache)
return -ENOMEM;
- rc = register_filesystem(&dax_fs_type);
- if (rc)
- goto err_register_fs;
-
dax_mnt = kern_mount(&dax_fs_type);
if (IS_ERR(dax_mnt)) {
rc = PTR_ERR(dax_mnt);
return 0;
err_mount:
- unregister_filesystem(&dax_fs_type);
- err_register_fs:
kmem_cache_destroy(dax_cache);
return rc;
static void dax_fs_exit(void)
{
kern_unmount(dax_mnt);
- unregister_filesystem(&dax_fs_type);
kmem_cache_destroy(dax_cache);
}
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Framework for buffer objects that can be shared across devices/subsystems.
*
* Arnd Bergmann <arnd@arndb.de>, Rob Clark <rob@ti.com> and
* Daniel Vetter <daniel@ffwll.ch> for their support in creation and
* refining of this idea.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/reservation.h>
#include <linux/mm.h>
+#include <linux/mount.h>
++#include <linux/pseudo_fs.h>
#include <uapi/linux/dma-buf.h>
+#include <uapi/linux/magic.h>
static inline int is_dma_buf_file(struct file *);
static struct dma_buf_list db_list;
- static struct dentry *dma_buf_fs_mount(struct file_system_type *fs_type,
- int flags, const char *name, void *data)
+static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ struct dma_buf *dmabuf;
+ char name[DMA_BUF_NAME_LEN];
+ size_t ret = 0;
+
+ dmabuf = dentry->d_fsdata;
+ mutex_lock(&dmabuf->lock);
+ if (dmabuf->name)
+ ret = strlcpy(name, dmabuf->name, DMA_BUF_NAME_LEN);
+ mutex_unlock(&dmabuf->lock);
+
+ return dynamic_dname(dentry, buffer, buflen, "/%s:%s",
+ dentry->d_name.name, ret > 0 ? name : "");
+}
+
+static const struct dentry_operations dma_buf_dentry_ops = {
+ .d_dname = dmabuffs_dname,
+};
+
+static struct vfsmount *dma_buf_mnt;
+
- return mount_pseudo(fs_type, "dmabuf:", NULL, &dma_buf_dentry_ops,
- DMA_BUF_MAGIC);
++static int dma_buf_fs_init_context(struct fs_context *fc)
+{
- .mount = dma_buf_fs_mount,
++ struct pseudo_fs_context *ctx;
++
++ ctx = init_pseudo(fc, DMA_BUF_MAGIC);
++ if (!ctx)
++ return -ENOMEM;
++ ctx->dops = &dma_buf_dentry_ops;
++ return 0;
+}
+
+static struct file_system_type dma_buf_fs_type = {
+ .name = "dmabuf",
++ .init_fs_context = dma_buf_fs_init_context,
+ .kill_sb = kill_anon_super,
+};
+
static int dma_buf_release(struct inode *inode, struct file *file)
{
struct dma_buf *dmabuf;
dmabuf = file->private_data;
+ /* check if buffer supports mmap */
+ if (!dmabuf->ops->mmap)
+ return -EINVAL;
+
/* check for overflowing the buffer's size */
if (vma->vm_pgoff + vma_pages(vma) >
dmabuf->size >> PAGE_SHIFT)
return events;
}
+/**
+ * dma_buf_set_name - Set a name to a specific dma_buf to track the usage.
+ * The name of the dma-buf buffer can only be set when the dma-buf is not
+ * attached to any devices. It could theoritically support changing the
+ * name of the dma-buf if the same piece of memory is used for multiple
+ * purpose between different devices.
+ *
+ * @dmabuf [in] dmabuf buffer that will be renamed.
+ * @buf: [in] A piece of userspace memory that contains the name of
+ * the dma-buf.
+ *
+ * Returns 0 on success. If the dma-buf buffer is already attached to
+ * devices, return -EBUSY.
+ *
+ */
+static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf)
+{
+ char *name = strndup_user(buf, DMA_BUF_NAME_LEN);
+ long ret = 0;
+
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+
+ mutex_lock(&dmabuf->lock);
+ if (!list_empty(&dmabuf->attachments)) {
+ ret = -EBUSY;
+ kfree(name);
+ goto out_unlock;
+ }
+ kfree(dmabuf->name);
+ dmabuf->name = name;
+
+out_unlock:
+ mutex_unlock(&dmabuf->lock);
+ return ret;
+}
+
static long dma_buf_ioctl(struct file *file,
unsigned int cmd, unsigned long arg)
{
ret = dma_buf_begin_cpu_access(dmabuf, direction);
return ret;
+
+ case DMA_BUF_SET_NAME:
+ return dma_buf_set_name(dmabuf, (const char __user *)arg);
+
default:
return -ENOTTY;
}
}
+static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file)
+{
+ struct dma_buf *dmabuf = file->private_data;
+
+ seq_printf(m, "size:\t%zu\n", dmabuf->size);
+ /* Don't count the temporary reference taken inside procfs seq_show */
+ seq_printf(m, "count:\t%ld\n", file_count(dmabuf->file) - 1);
+ seq_printf(m, "exp_name:\t%s\n", dmabuf->exp_name);
+ mutex_lock(&dmabuf->lock);
+ if (dmabuf->name)
+ seq_printf(m, "name:\t%s\n", dmabuf->name);
+ mutex_unlock(&dmabuf->lock);
+}
+
static const struct file_operations dma_buf_fops = {
.release = dma_buf_release,
.mmap = dma_buf_mmap_internal,
#ifdef CONFIG_COMPAT
.compat_ioctl = dma_buf_ioctl,
#endif
+ .show_fdinfo = dma_buf_show_fdinfo,
};
/*
return file->f_op == &dma_buf_fops;
}
+static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
+{
+ struct file *file;
+ struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
+
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ inode->i_size = dmabuf->size;
+ inode_set_bytes(inode, dmabuf->size);
+
+ file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf",
+ flags, &dma_buf_fops);
+ if (IS_ERR(file))
+ goto err_alloc_file;
+ file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
+ file->private_data = dmabuf;
+ file->f_path.dentry->d_fsdata = dmabuf;
+
+ return file;
+
+err_alloc_file:
+ iput(inode);
+ return file;
+}
+
/**
* DOC: dma buf device access
*
|| !exp_info->ops
|| !exp_info->ops->map_dma_buf
|| !exp_info->ops->unmap_dma_buf
- || !exp_info->ops->release
- || !exp_info->ops->mmap)) {
+ || !exp_info->ops->release)) {
return ERR_PTR(-EINVAL);
}
}
dmabuf->resv = resv;
- file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf,
- exp_info->flags);
+ file = dma_buf_getfile(dmabuf, exp_info->flags);
if (IS_ERR(file)) {
ret = PTR_ERR(file);
goto err_dmabuf;
list_add(&attach->node, &dmabuf->attachments);
mutex_unlock(&dmabuf->lock);
+
return attach;
err_attach:
if (WARN_ON(!dmabuf || !attach))
return;
+ if (attach->sgt)
+ dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
+
mutex_lock(&dmabuf->lock);
list_del(&attach->node);
if (dmabuf->ops->detach)
if (WARN_ON(!attach || !attach->dmabuf))
return ERR_PTR(-EINVAL);
+ if (attach->sgt) {
+ /*
+ * Two mappings with different directions for the same
+ * attachment are not allowed.
+ */
+ if (attach->dir != direction &&
+ attach->dir != DMA_BIDIRECTIONAL)
+ return ERR_PTR(-EBUSY);
+
+ return attach->sgt;
+ }
+
sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
if (!sg_table)
sg_table = ERR_PTR(-ENOMEM);
+ if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
+ attach->sgt = sg_table;
+ attach->dir = direction;
+ }
+
return sg_table;
}
EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
return;
- attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
- direction);
+ if (attach->sgt == sg_table)
+ return;
+
+ attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
}
EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
if (WARN_ON(!dmabuf || !vma))
return -EINVAL;
+ /* check if buffer supports mmap */
+ if (!dmabuf->ops->mmap)
+ return -EINVAL;
+
/* check for offset overflow */
if (pgoff + vma_pages(vma) < pgoff)
return -EOVERFLOW;
return ret;
seq_puts(s, "\nDma-buf Objects:\n");
- seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\n",
- "size", "flags", "mode", "count");
+ seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\n",
+ "size", "flags", "mode", "count", "ino");
list_for_each_entry(buf_obj, &db_list.head, list_node) {
ret = mutex_lock_interruptible(&buf_obj->lock);
continue;
}
- seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\n",
+ seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n",
buf_obj->size,
buf_obj->file->f_flags, buf_obj->file->f_mode,
file_count(buf_obj->file),
- buf_obj->exp_name);
+ buf_obj->exp_name,
+ file_inode(buf_obj->file)->i_ino,
+ buf_obj->name ?: "");
robj = buf_obj->resv;
while (true) {
fence->ops->get_driver_name(fence),
fence->ops->get_timeline_name(fence),
dma_fence_is_signaled(fence) ? "" : "un");
+ dma_fence_put(fence);
}
rcu_read_unlock();
static int __init dma_buf_init(void)
{
+ dma_buf_mnt = kern_mount(&dma_buf_fs_type);
+ if (IS_ERR(dma_buf_mnt))
+ return PTR_ERR(dma_buf_mnt);
+
mutex_init(&db_list.lock);
INIT_LIST_HEAD(&db_list.head);
dma_buf_init_debugfs();
static void __exit dma_buf_deinit(void)
{
dma_buf_uninit_debugfs();
+ kern_unmount(dma_buf_mnt);
}
__exitcall(dma_buf_deinit);
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/mount.h>
+ #include <linux/pseudo_fs.h>
#include <linux/slab.h>
#include <linux/srcu.h>
#include <drm/drm_client.h>
+#include <drm/drm_color_mgmt.h>
#include <drm/drm_drv.h>
-#include <drm/drmP.h>
+#include <drm/drm_file.h>
+#include <drm/drm_mode_object.h>
+#include <drm/drm_print.h>
#include "drm_crtc_internal.h"
-#include "drm_legacy.h"
#include "drm_internal.h"
+#include "drm_legacy.h"
/*
* drm_debug: Enable debug output.
- * Bitmask of DRM_UT_x. See include/drm/drmP.h for details.
+ * Bitmask of DRM_UT_x. See include/drm/drm_print.h for details.
*/
unsigned int drm_debug = 0;
EXPORT_SYMBOL(drm_debug);
static int drm_fs_cnt;
static struct vfsmount *drm_fs_mnt;
- static const struct dentry_operations drm_fs_dops = {
- .d_dname = simple_dname,
- };
-
- static const struct super_operations drm_fs_sops = {
- .statfs = simple_statfs,
- };
-
- static struct dentry *drm_fs_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
+ static int drm_fs_init_fs_context(struct fs_context *fc)
{
- return mount_pseudo(fs_type,
- "drm:",
- &drm_fs_sops,
- &drm_fs_dops,
- 0x010203ff);
+ return init_pseudo(fc, 0x010203ff) ? 0 : -ENOMEM;
}
static struct file_system_type drm_fs_type = {
.name = "drm",
.owner = THIS_MODULE,
- .mount = drm_fs_mount,
+ .init_fs_context = drm_fs_init_fs_context,
.kill_sb = kill_anon_super,
};
}
drm_debugfs_root = debugfs_create_dir("dri", NULL);
- if (!drm_debugfs_root) {
- ret = -ENOMEM;
- DRM_ERROR("Cannot create debugfs-root: %d\n", ret);
- goto error;
- }
ret = register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops);
if (ret < 0)
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2014 IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/pci.h>
#include <misc/cxl.h>
#include <linux/module.h>
#include <linux/mount.h>
+ #include <linux/pseudo_fs.h>
#include <linux/sched/mm.h>
#include <linux/mmu_context.h>
static int cxl_fs_cnt;
static struct vfsmount *cxl_vfs_mount;
- static const struct dentry_operations cxl_fs_dops = {
- .d_dname = simple_dname,
- };
-
- static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
+ static int cxl_fs_init_fs_context(struct fs_context *fc)
{
- return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops,
- CXL_PSEUDO_FS_MAGIC);
+ return init_pseudo(fc, CXL_PSEUDO_FS_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type cxl_fs_type = {
.name = "cxl",
.owner = THIS_MODULE,
- .mount = cxl_fs_mount,
+ .init_fs_context = cxl_fs_init_fs_context,
.kill_sb = kill_anon_super,
};
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IBM ASM Service Processor Device Driver
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2004
*
* Author: Max Asböck <amax@us.ibm.com>
- *
*/
/*
*/
#include <linux/fs.h>
+ #include <linux/fs_context.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode);
static void ibmasmfs_create_files (struct super_block *sb);
- static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent);
+ static int ibmasmfs_fill_super(struct super_block *sb, struct fs_context *fc);
+ static int ibmasmfs_get_tree(struct fs_context *fc)
+ {
+ return get_tree_single(fc, ibmasmfs_fill_super);
+ }
+
+ static const struct fs_context_operations ibmasmfs_context_ops = {
+ .get_tree = ibmasmfs_get_tree,
+ };
- static struct dentry *ibmasmfs_mount(struct file_system_type *fst,
- int flags, const char *name, void *data)
+ static int ibmasmfs_init_fs_context(struct fs_context *fc)
{
- return mount_single(fst, flags, data, ibmasmfs_fill_super);
+ fc->ops = &ibmasmfs_context_ops;
+ return 0;
}
static const struct super_operations ibmasmfs_s_ops = {
static struct file_system_type ibmasmfs_type = {
.owner = THIS_MODULE,
.name = "ibmasmfs",
- .mount = ibmasmfs_mount,
+ .init_fs_context = ibmasmfs_init_fs_context,
.kill_sb = kill_litter_super,
};
MODULE_ALIAS_FS("ibmasmfs");
- static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent)
+ static int ibmasmfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *root;
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/mount.h>
++#include <linux/pseudo_fs.h>
+#include <linux/balloon_compaction.h>
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
#include <asm/hypervisor.h>
MODULE_ALIAS("vmware_vmmemctl");
MODULE_LICENSE("GPL");
-/*
- * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait
- * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page
- * allocation failure warnings. Disallow access to emergency low-memory pools.
- */
-#define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
- __GFP_NOMEMALLOC)
+static bool __read_mostly vmwballoon_shrinker_enable;
+module_param(vmwballoon_shrinker_enable, bool, 0444);
+MODULE_PARM_DESC(vmwballoon_shrinker_enable,
+ "Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance.");
-/*
- * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight
- * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation
- * failure warnings. Disallow access to emergency low-memory pools.
- */
-#define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
- __GFP_NOMEMALLOC|__GFP_NORETRY)
+/* Delay in seconds after shrink before inflation. */
+#define VMBALLOON_SHRINK_DELAY (5)
/* Maximum number of refused pages we accumulate during inflation cycle */
#define VMW_BALLOON_MAX_REFUSED 16
+/* Magic number for the balloon mount-point */
+#define BALLOON_VMW_MAGIC 0x0ba11007
+
/*
* Hypervisor communication port definitions.
*/
VMW_BALLOON_STAT_TIMER,
VMW_BALLOON_STAT_DOORBELL,
VMW_BALLOON_STAT_RESET,
- VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_RESET
+ VMW_BALLOON_STAT_SHRINK,
+ VMW_BALLOON_STAT_SHRINK_FREE,
+ VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_SHRINK_FREE
};
#define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1)
-
static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);
struct vmballoon_ctl {
struct list_head pages;
struct list_head refused_pages;
+ struct list_head prealloc_pages;
unsigned int n_refused_pages;
unsigned int n_pages;
enum vmballoon_page_size_type page_size;
enum vmballoon_op op;
};
-struct vmballoon_page_size {
- /* list of reserved physical pages */
- struct list_head pages;
-};
-
/**
* struct vmballoon_batch_entry - a batch entry for lock or unlock.
*
} __packed;
struct vmballoon {
- struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
-
/**
* @max_page_size: maximum supported page size for ballooning.
*
*/
struct page *page;
+ /**
+ * @shrink_timeout: timeout until the next inflation.
+ *
+ * After an shrink event, indicates the time in jiffies after which
+ * inflation is allowed again. Can be written concurrently with reads,
+ * so must use READ_ONCE/WRITE_ONCE when accessing.
+ */
+ unsigned long shrink_timeout;
+
/* statistics */
struct vmballoon_stats *stats;
struct dentry *dbg_entry;
#endif
+ /**
+ * @b_dev_info: balloon device information descriptor.
+ */
+ struct balloon_dev_info b_dev_info;
+
struct delayed_work dwork;
+ /**
+ * @huge_pages - list of the inflated 2MB pages.
+ *
+ * Protected by @b_dev_info.pages_lock .
+ */
+ struct list_head huge_pages;
+
/**
* @vmci_doorbell.
*
* Lock ordering: @conf_sem -> @comm_lock .
*/
spinlock_t comm_lock;
+
+ /**
+ * @shrinker: shrinker interface that is used to avoid over-inflation.
+ */
+ struct shrinker shrinker;
+
+ /**
+ * @shrinker_registered: whether the shrinker was registered.
+ *
+ * The shrinker interface does not handle gracefully the removal of
+ * shrinker that was not registered before. This indication allows to
+ * simplify the unregistration process.
+ */
+ bool shrinker_registered;
};
static struct vmballoon balloon;
unsigned int i;
for (i = 0; i < req_n_pages; i++) {
- if (ctl->page_size == VMW_BALLOON_2M_PAGE)
- page = alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS,
- VMW_BALLOON_2M_ORDER);
- else
- page = alloc_page(VMW_PAGE_ALLOC_FLAGS);
-
- /* Update statistics */
- vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
- ctl->page_size);
+ /*
+ * First check if we happen to have pages that were allocated
+ * before. This happens when 2MB page rejected during inflation
+ * by the hypervisor, and then split into 4KB pages.
+ */
+ if (!list_empty(&ctl->prealloc_pages)) {
+ page = list_first_entry(&ctl->prealloc_pages,
+ struct page, lru);
+ list_del(&page->lru);
+ } else {
+ if (ctl->page_size == VMW_BALLOON_2M_PAGE)
+ page = alloc_pages(__GFP_HIGHMEM|__GFP_NOWARN|
+ __GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER);
+ else
+ page = balloon_page_alloc();
+
+ vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
+ ctl->page_size);
+ }
if (page) {
vmballoon_mark_page_offline(page, ctl->page_size);
__free_pages(page, vmballoon_page_order(page_size));
}
- *n_pages = 0;
+ if (n_pages)
+ *n_pages = 0;
}
size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE))
return 0;
+ /* If an out-of-memory recently occurred, inflation is disallowed. */
+ if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout)))
+ return 0;
+
return target - size;
}
unsigned int *n_pages,
enum vmballoon_page_size_type page_size)
{
- struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size];
+ unsigned long flags;
+
+ if (page_size == VMW_BALLOON_4K_PAGE) {
+ balloon_page_list_enqueue(&b->b_dev_info, pages);
+ } else {
+ /*
+ * Keep the huge pages in a local list which is not available
+ * for the balloon compaction mechanism.
+ */
+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+ list_splice_init(pages, &b->huge_pages);
+ __count_vm_events(BALLOON_INFLATE, *n_pages *
+ vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
+ }
- list_splice_init(pages, &page_size_info->pages);
*n_pages = 0;
}
enum vmballoon_page_size_type page_size,
unsigned int n_req_pages)
{
- struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size];
struct page *page, *tmp;
unsigned int i = 0;
+ unsigned long flags;
- list_for_each_entry_safe(page, tmp, &page_size_info->pages, lru) {
+ /* In the case of 4k pages, use the compaction infrastructure */
+ if (page_size == VMW_BALLOON_4K_PAGE) {
+ *n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages,
+ n_req_pages);
+ return;
+ }
+
+ /* 2MB pages */
+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+ list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) {
list_move(&page->lru, pages);
if (++i == n_req_pages)
break;
}
+
+ __count_vm_events(BALLOON_DEFLATE,
+ i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
*n_pages = i;
}
+/**
+ * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k.
+ *
+ * If inflation of 2MB pages was denied by the hypervisor, it is likely to be
+ * due to one or few 4KB pages. These 2MB pages may keep being allocated and
+ * then being refused. To prevent this case, this function splits the refused
+ * pages into 4KB pages and adds them into @prealloc_pages list.
+ *
+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
+ */
+static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl)
+{
+ struct page *page, *tmp;
+ unsigned int i, order;
+
+ order = vmballoon_page_order(ctl->page_size);
+
+ list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) {
+ list_del(&page->lru);
+ split_page(page, order);
+ for (i = 0; i < (1 << order); i++)
+ list_add(&page[i].lru, &ctl->prealloc_pages);
+ }
+ ctl->n_refused_pages = 0;
+}
+
/**
* vmballoon_inflate() - Inflate the balloon towards its target size.
*
struct vmballoon_ctl ctl = {
.pages = LIST_HEAD_INIT(ctl.pages),
.refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
+ .prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages),
.page_size = b->max_page_size,
.op = VMW_BALLOON_INFLATE
};
break;
/*
- * Ignore errors from locking as we now switch to 4k
- * pages and we might get different errors.
+ * Split the refused pages to 4k. This will also empty
+ * the refused pages list.
*/
- vmballoon_release_refused_pages(b, &ctl);
+ vmballoon_split_refused_pages(&ctl);
ctl.page_size--;
}
*/
if (ctl.n_refused_pages != 0)
vmballoon_release_refused_pages(b, &ctl);
+
+ vmballoon_release_page_list(&ctl.prealloc_pages, NULL, ctl.page_size);
}
/**
}
+/**
+ * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure.
+ * @shrinker: pointer to the balloon shrinker.
+ * @sc: page reclaim information.
+ *
+ * Returns: number of pages that were freed during deflation.
+ */
+static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ struct vmballoon *b = &balloon;
+ unsigned long deflated_frames;
+
+ pr_debug("%s - size: %llu", __func__, atomic64_read(&b->size));
+
+ vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_SHRINK);
+
+ /*
+ * If the lock is also contended for read, we cannot easily reclaim and
+ * we bail out.
+ */
+ if (!down_read_trylock(&b->conf_sem))
+ return 0;
+
+ deflated_frames = vmballoon_deflate(b, sc->nr_to_scan, true);
+
+ vmballoon_stats_gen_add(b, VMW_BALLOON_STAT_SHRINK_FREE,
+ deflated_frames);
+
+ /*
+ * Delay future inflation for some time to mitigate the situations in
+ * which balloon continuously grows and shrinks. Use WRITE_ONCE() since
+ * the access is asynchronous.
+ */
+ WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY);
+
+ up_read(&b->conf_sem);
+
+ return deflated_frames;
+}
+
+/**
+ * vmballoon_shrinker_count() - return the number of ballooned pages.
+ * @shrinker: pointer to the balloon shrinker.
+ * @sc: page reclaim information.
+ *
+ * Returns: number of 4k pages that are allocated for the balloon and can
+ * therefore be reclaimed under pressure.
+ */
+static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ struct vmballoon *b = &balloon;
+
+ return atomic64_read(&b->size);
+}
+
+static void vmballoon_unregister_shrinker(struct vmballoon *b)
+{
+ if (b->shrinker_registered)
+ unregister_shrinker(&b->shrinker);
+ b->shrinker_registered = false;
+}
+
+static int vmballoon_register_shrinker(struct vmballoon *b)
+{
+ int r;
+
+ /* Do nothing if the shrinker is not enabled */
+ if (!vmwballoon_shrinker_enable)
+ return 0;
+
+ b->shrinker.scan_objects = vmballoon_shrinker_scan;
+ b->shrinker.count_objects = vmballoon_shrinker_count;
+ b->shrinker.seeks = DEFAULT_SEEKS;
+
+ r = register_shrinker(&b->shrinker);
+
+ if (r == 0)
+ b->shrinker_registered = true;
+
+ return r;
+}
+
/*
* DEBUGFS Interface
*/
[VMW_BALLOON_STAT_TIMER] = "timer",
[VMW_BALLOON_STAT_DOORBELL] = "doorbell",
[VMW_BALLOON_STAT_RESET] = "reset",
+ [VMW_BALLOON_STAT_SHRINK] = "shrink",
+ [VMW_BALLOON_STAT_SHRINK_FREE] = "shrinkFree"
};
static int vmballoon_enable_stats(struct vmballoon *b)
DEFINE_SHOW_ATTRIBUTE(vmballoon_debug);
-static int __init vmballoon_debugfs_init(struct vmballoon *b)
+static void __init vmballoon_debugfs_init(struct vmballoon *b)
{
- int error;
-
b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
&vmballoon_debug_fops);
- if (IS_ERR(b->dbg_entry)) {
- error = PTR_ERR(b->dbg_entry);
- pr_err("failed to create debugfs entry, error: %d\n", error);
- return error;
- }
-
- return 0;
}
static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
#else
-static inline int vmballoon_debugfs_init(struct vmballoon *b)
+static inline void vmballoon_debugfs_init(struct vmballoon *b)
{
- return 0;
}
static inline void vmballoon_debugfs_exit(struct vmballoon *b)
#endif /* CONFIG_DEBUG_FS */
- static struct dentry *vmballoon_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data)
+
+#ifdef CONFIG_BALLOON_COMPACTION
+
- static const struct dentry_operations ops = {
- .d_dname = simple_dname,
- };
-
- return mount_pseudo(fs_type, "balloon-vmware:", NULL, &ops,
- BALLOON_VMW_MAGIC);
++static int vmballoon_init_fs_context(struct fs_context *fc)
+{
- .name = "balloon-vmware",
- .mount = vmballoon_mount,
- .kill_sb = kill_anon_super,
++ return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type vmballoon_fs = {
++ .name = "balloon-vmware",
++ .init_fs_context = vmballoon_init_fs_context,
++ .kill_sb = kill_anon_super,
+};
+
+static struct vfsmount *vmballoon_mnt;
+
+/**
+ * vmballoon_migratepage() - migrates a balloon page.
+ * @b_dev_info: balloon device information descriptor.
+ * @newpage: the page to which @page should be migrated.
+ * @page: a ballooned page that should be migrated.
+ * @mode: migration mode, ignored.
+ *
+ * This function is really open-coded, but that is according to the interface
+ * that balloon_compaction provides.
+ *
+ * Return: zero on success, -EAGAIN when migration cannot be performed
+ * momentarily, and -EBUSY if migration failed and should be retried
+ * with that specific page.
+ */
+static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
+ struct page *newpage, struct page *page,
+ enum migrate_mode mode)
+{
+ unsigned long status, flags;
+ struct vmballoon *b;
+ int ret;
+
+ b = container_of(b_dev_info, struct vmballoon, b_dev_info);
+
+ /*
+ * If the semaphore is taken, there is ongoing configuration change
+ * (i.e., balloon reset), so try again.
+ */
+ if (!down_read_trylock(&b->conf_sem))
+ return -EAGAIN;
+
+ spin_lock(&b->comm_lock);
+ /*
+ * We must start by deflating and not inflating, as otherwise the
+ * hypervisor may tell us that it has enough memory and the new page is
+ * not needed. Since the old page is isolated, we cannot use the list
+ * interface to unlock it, as the LRU field is used for isolation.
+ * Instead, we use the native interface directly.
+ */
+ vmballoon_add_page(b, 0, page);
+ status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
+ VMW_BALLOON_DEFLATE);
+
+ if (status == VMW_BALLOON_SUCCESS)
+ status = vmballoon_status_page(b, 0, &page);
+
+ /*
+ * If a failure happened, let the migration mechanism know that it
+ * should not retry.
+ */
+ if (status != VMW_BALLOON_SUCCESS) {
+ spin_unlock(&b->comm_lock);
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ /*
+ * The page is isolated, so it is safe to delete it without holding
+ * @pages_lock . We keep holding @comm_lock since we will need it in a
+ * second.
+ */
+ balloon_page_delete(page);
+
+ put_page(page);
+
+ /* Inflate */
+ vmballoon_add_page(b, 0, newpage);
+ status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
+ VMW_BALLOON_INFLATE);
+
+ if (status == VMW_BALLOON_SUCCESS)
+ status = vmballoon_status_page(b, 0, &newpage);
+
+ spin_unlock(&b->comm_lock);
+
+ if (status != VMW_BALLOON_SUCCESS) {
+ /*
+ * A failure happened. While we can deflate the page we just
+ * inflated, this deflation can also encounter an error. Instead
+ * we will decrease the size of the balloon to reflect the
+ * change and report failure.
+ */
+ atomic64_dec(&b->size);
+ ret = -EBUSY;
+ } else {
+ /*
+ * Success. Take a reference for the page, and we will add it to
+ * the list after acquiring the lock.
+ */
+ get_page(newpage);
+ ret = MIGRATEPAGE_SUCCESS;
+ }
+
+ /* Update the balloon list under the @pages_lock */
+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+
+ /*
+ * On inflation success, we already took a reference for the @newpage.
+ * If we succeed just insert it to the list and update the statistics
+ * under the lock.
+ */
+ if (ret == MIGRATEPAGE_SUCCESS) {
+ balloon_page_insert(&b->b_dev_info, newpage);
+ __count_vm_event(BALLOON_MIGRATE);
+ }
+
+ /*
+ * We deflated successfully, so regardless to the inflation success, we
+ * need to reduce the number of isolated_pages.
+ */
+ b->b_dev_info.isolated_pages--;
+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
+
+out_unlock:
+ up_read(&b->conf_sem);
+ return ret;
+}
+
+/**
+ * vmballoon_compaction_deinit() - removes compaction related data.
+ *
+ * @b: pointer to the balloon.
+ */
+static void vmballoon_compaction_deinit(struct vmballoon *b)
+{
+ if (!IS_ERR(b->b_dev_info.inode))
+ iput(b->b_dev_info.inode);
+
+ b->b_dev_info.inode = NULL;
+ kern_unmount(vmballoon_mnt);
+ vmballoon_mnt = NULL;
+}
+
+/**
+ * vmballoon_compaction_init() - initialized compaction for the balloon.
+ *
+ * @b: pointer to the balloon.
+ *
+ * If during the initialization a failure occurred, this function does not
+ * perform cleanup. The caller must call vmballoon_compaction_deinit() in this
+ * case.
+ *
+ * Return: zero on success or error code on failure.
+ */
+static __init int vmballoon_compaction_init(struct vmballoon *b)
+{
+ vmballoon_mnt = kern_mount(&vmballoon_fs);
+ if (IS_ERR(vmballoon_mnt))
+ return PTR_ERR(vmballoon_mnt);
+
+ b->b_dev_info.migratepage = vmballoon_migratepage;
+ b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
+
+ if (IS_ERR(b->b_dev_info.inode))
+ return PTR_ERR(b->b_dev_info.inode);
+
+ b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
+ return 0;
+}
+
+#else /* CONFIG_BALLOON_COMPACTION */
+
+static void vmballoon_compaction_deinit(struct vmballoon *b)
+{
+}
+
+static int vmballoon_compaction_init(struct vmballoon *b)
+{
+ return 0;
+}
+
+#endif /* CONFIG_BALLOON_COMPACTION */
+
static int __init vmballoon_init(void)
{
- enum vmballoon_page_size_type page_size;
int error;
/*
if (x86_hyper_type != X86_HYPER_VMWARE)
return -ENODEV;
- for (page_size = VMW_BALLOON_4K_PAGE;
- page_size <= VMW_BALLOON_LAST_SIZE; page_size++)
- INIT_LIST_HEAD(&balloon.page_sizes[page_size].pages);
-
-
INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
- error = vmballoon_debugfs_init(&balloon);
+ error = vmballoon_register_shrinker(&balloon);
+ if (error)
+ goto fail;
+
+ /*
+ * Initialization of compaction must be done after the call to
+ * balloon_devinfo_init() .
+ */
+ balloon_devinfo_init(&balloon.b_dev_info);
+ error = vmballoon_compaction_init(&balloon);
if (error)
- return error;
+ goto fail;
+ INIT_LIST_HEAD(&balloon.huge_pages);
spin_lock_init(&balloon.comm_lock);
init_rwsem(&balloon.conf_sem);
balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
+ vmballoon_debugfs_init(&balloon);
+
return 0;
+fail:
+ vmballoon_unregister_shrinker(&balloon);
+ vmballoon_compaction_deinit(&balloon);
+ return error;
}
/*
static void __exit vmballoon_exit(void)
{
+ vmballoon_unregister_shrinker(&balloon);
vmballoon_vmci_cleanup(&balloon);
cancel_delayed_work_sync(&balloon.dwork);
*/
vmballoon_send_start(&balloon, 0);
vmballoon_pop(&balloon);
+
+ /* Only once we popped the balloon, compaction can be deinit */
+ vmballoon_compaction_deinit(&balloon);
}
module_exit(vmballoon_exit);
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* CXL Flash Device Driver
*
* Uma Krishnan <ukrishn@linux.vnet.ibm.com>, IBM Corporation
*
* Copyright (C) 2018 IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/file.h>
#include <linux/idr.h>
#include <linux/module.h>
#include <linux/mount.h>
+ #include <linux/pseudo_fs.h>
#include <linux/poll.h>
#include <linux/sched/signal.h>
static int ocxlflash_fs_cnt;
static struct vfsmount *ocxlflash_vfs_mount;
- static const struct dentry_operations ocxlflash_fs_dops = {
- .d_dname = simple_dname,
- };
-
- /*
- * ocxlflash_fs_mount() - mount the pseudo-filesystem
- * @fs_type: File system type.
- * @flags: Flags for the filesystem.
- * @dev_name: Device name associated with the filesystem.
- * @data: Data pointer.
- *
- * Return: pointer to the directory entry structure
- */
- static struct dentry *ocxlflash_fs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data)
+ static int ocxlflash_fs_init_fs_context(struct fs_context *fc)
{
- return mount_pseudo(fs_type, "ocxlflash:", NULL, &ocxlflash_fs_dops,
- OCXLFLASH_FS_MAGIC);
+ return init_pseudo(fc, OCXLFLASH_FS_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type ocxlflash_fs_type = {
.name = "ocxlflash",
.owner = THIS_MODULE,
- .mount = ocxlflash_fs_mount,
+ .init_fs_context = ocxlflash_fs_init_fs_context,
.kill_sb = kill_anon_super,
};
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Virtio balloon implementation, inspired by Dor Laor and Marcelo
* Tosatti's implementations.
*
* Copyright 2008 Rusty Russell IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/virtio.h>
#include <linux/mm.h>
#include <linux/mount.h>
#include <linux/magic.h>
+ #include <linux/pseudo_fs.h>
/*
* Balloon device works in 4K page units. So each page is pointed to by
return MIGRATEPAGE_SUCCESS;
}
- static struct dentry *balloon_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int balloon_init_fs_context(struct fs_context *fc)
{
- static const struct dentry_operations ops = {
- .d_dname = simple_dname,
- };
-
- return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops,
- BALLOON_KVM_MAGIC);
+ return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type balloon_fs = {
.name = "balloon-kvm",
- .mount = balloon_mount,
+ .init_fs_context = balloon_init_fs_context,
.kill_sb = kill_anon_super,
};
+// SPDX-License-Identifier: GPL-2.0-only
/*
* xenfs.c - a filesystem for passing info between the a domain and
* the hypervisor.
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/fs.h>
+ #include <linux/fs_context.h>
#include <linux/magic.h>
#include <xen/xen.h>
.llseek = default_llseek,
};
- static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
+ static int xenfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
static const struct tree_descr xenfs_files[] = {
[2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
xen_initial_domain() ? xenfs_init_files : xenfs_files);
}
- static struct dentry *xenfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data)
+ static int xenfs_get_tree(struct fs_context *fc)
{
- return mount_single(fs_type, flags, data, xenfs_fill_super);
+ return get_tree_single(fc, xenfs_fill_super);
+ }
+
+ static const struct fs_context_operations xenfs_context_ops = {
+ .get_tree = xenfs_get_tree,
+ };
+
+ static int xenfs_init_fs_context(struct fs_context *fc)
+ {
+ fc->ops = &xenfs_context_ops;
+ return 0;
}
static struct file_system_type xenfs_type = {
.owner = THIS_MODULE,
.name = "xenfs",
- .mount = xenfs_mount,
+ .init_fs_context = xenfs_init_fs_context,
.kill_sb = kill_litter_super,
};
MODULE_ALIAS_FS("xenfs");
#include <linux/ramfs.h>
#include <linux/percpu-refcount.h>
#include <linux/mount.h>
+ #include <linux/pseudo_fs.h>
#include <asm/kmap_types.h>
#include <linux/uaccess.h>
return file;
}
- static struct dentry *aio_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int aio_init_fs_context(struct fs_context *fc)
{
- struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL,
- AIO_RING_MAGIC);
-
- if (!IS_ERR(root))
- root->d_sb->s_iflags |= SB_I_NOEXEC;
- return root;
+ if (!init_pseudo(fc, AIO_RING_MAGIC))
+ return -ENOMEM;
+ fc->s_iflags |= SB_I_NOEXEC;
+ return 0;
}
/* aio_setup
{
static struct file_system_type aio_fs = {
.name = "aio",
- .mount = aio_mount,
+ .init_fs_context = aio_init_fs_context,
.kill_sb = kill_anon_super,
};
aio_mnt = kern_mount(&aio_fs);
BUG_ON(PageWriteback(old));
get_page(new);
- rc = migrate_page_move_mapping(mapping, new, old, mode, 1);
+ rc = migrate_page_move_mapping(mapping, new, old, 1);
if (rc != MIGRATEPAGE_SUCCESS) {
put_page(new);
goto out_unlock;
return 0;
}
-static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
- bool vectored, bool compat, struct iov_iter *iter)
+static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
+ struct iovec **iovec, bool vectored, bool compat,
+ struct iov_iter *iter)
{
void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
size_t len = iocb->aio_nbytes;
return -EINVAL;
ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
- if (ret)
+ if (ret < 0)
return ret;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret)
return -EINVAL;
ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
- if (ret)
+ if (ret < 0)
return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
const struct __aio_sigset __user *, usig)
{
struct __aio_sigset ksig = { NULL, };
- sigset_t ksigmask, sigsaved;
struct timespec64 ts;
+ bool interrupted;
int ret;
if (timeout && unlikely(get_timespec64(&ts, timeout)))
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
if (ret)
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_saved_sigmask_unless(interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
const struct __aio_sigset __user *, usig)
{
struct __aio_sigset ksig = { NULL, };
- sigset_t ksigmask, sigsaved;
struct timespec64 ts;
+ bool interrupted;
int ret;
if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
return -EFAULT;
- ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
if (ret)
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_saved_sigmask_unless(interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
const struct __compat_aio_sigset __user *, usig)
{
struct __compat_aio_sigset ksig = { NULL, };
- sigset_t ksigmask, sigsaved;
struct timespec64 t;
+ bool interrupted;
int ret;
if (timeout && get_old_timespec32(&t, timeout))
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize);
if (ret)
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_saved_sigmask_unless(interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
const struct __compat_aio_sigset __user *, usig)
{
struct __compat_aio_sigset ksig = { NULL, };
- sigset_t ksigmask, sigsaved;
struct timespec64 t;
+ bool interrupted;
int ret;
if (timeout && get_timespec64(&t, timeout))
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize);
if (ret)
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_saved_sigmask_unless(interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
+// SPDX-License-Identifier: GPL-2.0-only
/*
* fs/anon_inodes.c
*
#include <linux/kernel.h>
#include <linux/magic.h>
#include <linux/anon_inodes.h>
+ #include <linux/pseudo_fs.h>
#include <linux/uaccess.h>
.d_dname = anon_inodefs_dname,
};
- static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int anon_inodefs_init_fs_context(struct fs_context *fc)
{
- return mount_pseudo(fs_type, "anon_inode:", NULL,
- &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
+ struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->dops = &anon_inodefs_dentry_operations;
+ return 0;
}
static struct file_system_type anon_inode_fs_type = {
.name = "anon_inodefs",
- .mount = anon_inodefs_mount,
+ .init_fs_context = anon_inodefs_init_fs_context,
.kill_sb = kill_anon_super,
};
+// SPDX-License-Identifier: GPL-2.0-only
/*
* binfmt_misc.c
*
#include <linux/pagemap.h>
#include <linux/namei.h>
#include <linux/mount.h>
+ #include <linux/fs_context.h>
#include <linux/syscalls.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
.evict_inode = bm_evict_inode,
};
- static int bm_fill_super(struct super_block *sb, void *data, int silent)
+ static int bm_fill_super(struct super_block *sb, struct fs_context *fc)
{
int err;
static const struct tree_descr bm_files[] = {
return err;
}
- static struct dentry *bm_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int bm_get_tree(struct fs_context *fc)
{
- return mount_single(fs_type, flags, data, bm_fill_super);
+ return get_tree_single(fc, bm_fill_super);
+ }
+
+ static const struct fs_context_operations bm_context_ops = {
+ .get_tree = bm_get_tree,
+ };
+
+ static int bm_init_fs_context(struct fs_context *fc)
+ {
+ fc->ops = &bm_context_ops;
+ return 0;
}
static struct linux_binfmt misc_format = {
static struct file_system_type bm_fs_type = {
.owner = THIS_MODULE,
.name = "binfmt_misc",
- .mount = bm_mount,
+ .init_fs_context = bm_init_fs_context,
.kill_sb = kill_litter_super,
};
MODULE_ALIAS_FS("binfmt_misc");
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/block_dev.c
*
#include <linux/writeback.h>
#include <linux/mpage.h>
#include <linux/mount.h>
+ #include <linux/pseudo_fs.h>
#include <linux/uio.h>
#include <linux/namei.h>
#include <linux/log2.h>
{
struct file *file = iocb->ki_filp;
struct block_device *bdev = I_BDEV(bdev_file_inode(file));
- struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec;
+ struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
loff_t pos = iocb->ki_pos;
bool should_dirty = false;
struct bio bio;
ssize_t ret;
blk_qc_t qc;
- struct bvec_iter_all iter_all;
if ((pos | iov_iter_alignment(iter)) &
(bdev_logical_block_size(bdev) - 1))
}
__set_current_state(TASK_RUNNING);
- bio_for_each_segment_all(bvec, &bio, iter_all) {
- if (should_dirty && !PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
- if (!bio_flagged(&bio, BIO_NO_PAGE_REF))
- put_page(bvec->bv_page);
- }
-
+ bio_release_pages(&bio, should_dirty);
if (unlikely(bio.bi_status))
ret = blk_status_to_errno(bio.bi_status);
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
- if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
- struct bvec_iter_all iter_all;
- struct bio_vec *bvec;
-
- bio_for_each_segment_all(bvec, bio, iter_all)
- put_page(bvec->bv_page);
- }
+ bio_release_pages(bio, false);
bio_put(bio);
}
}
.evict_inode = bdev_evict_inode,
};
- static struct dentry *bd_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int bd_init_fs_context(struct fs_context *fc)
{
- struct dentry *dent;
- dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
- if (!IS_ERR(dent))
- dent->d_sb->s_iflags |= SB_I_CGROUPWB;
- return dent;
+ struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+ fc->s_iflags |= SB_I_CGROUPWB;
+ ctx->ops = &bdev_sops;
+ return 0;
}
static struct file_system_type bd_type = {
.name = "bdev",
- .mount = bd_mount,
+ .init_fs_context = bd_init_fs_context,
.kill_sb = kill_anon_super,
};
*/
int revalidate_disk(struct gendisk *disk)
{
- struct block_device *bdev;
int ret = 0;
if (disk->fops->revalidate_disk)
ret = disk->fops->revalidate_disk(disk);
- bdev = bdget_disk(disk, 0);
- if (!bdev)
- return ret;
- mutex_lock(&bdev->bd_mutex);
- check_disk_size_change(disk, bdev, ret == 0);
- bdev->bd_invalidated = 0;
- mutex_unlock(&bdev->bd_mutex);
- bdput(bdev);
+ /*
+ * Hidden disks don't have associated bdev so there's no point in
+ * revalidating it.
+ */
+ if (!(disk->flags & GENHD_FL_HIDDEN)) {
+ struct block_device *bdev = bdget_disk(disk, 0);
+
+ if (!bdev)
+ return ret;
+
+ mutex_lock(&bdev->bd_mutex);
+ check_disk_size_change(disk, bdev, ret == 0);
+ bdev->bd_invalidated = 0;
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ }
return ret;
}
EXPORT_SYMBOL(revalidate_disk);
+// SPDX-License-Identifier: GPL-2.0-or-later
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* mount.c - operations for initializing and mounting configfs.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
* Based on sysfs:
* sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
*
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/mount.h>
+ #include <linux/fs_context.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/slab.h>
.s_iattr = NULL,
};
- static int configfs_fill_super(struct super_block *sb, void *data, int silent)
+ static int configfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
struct dentry *root;
return 0;
}
- static struct dentry *configfs_do_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int configfs_get_tree(struct fs_context *fc)
{
- return mount_single(fs_type, flags, data, configfs_fill_super);
+ return get_tree_single(fc, configfs_fill_super);
+ }
+
+ static const struct fs_context_operations configfs_context_ops = {
+ .get_tree = configfs_get_tree,
+ };
+
+ static int configfs_init_fs_context(struct fs_context *fc)
+ {
+ fc->ops = &configfs_context_ops;
+ return 0;
}
static struct file_system_type configfs_fs_type = {
.owner = THIS_MODULE,
.name = "configfs",
- .mount = configfs_do_mount,
+ .init_fs_context = configfs_init_fs_context,
.kill_sb = kill_litter_super,
};
MODULE_ALIAS_FS("configfs");
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2012 Red Hat, Inc.
* Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/ctype.h>
#include <linux/efi.h>
#include <linux/fs.h>
+ #include <linux/fs_context.h>
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/ucs2_string.h>
.evict_inode = efivarfs_evict_inode,
};
- static struct super_block *efivarfs_sb;
-
/*
* Compare two efivarfs file names.
*
return 0;
}
- static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
+ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode = NULL;
struct dentry *root;
int err;
- efivarfs_sb = sb;
-
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
return err;
}
- static struct dentry *efivarfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int efivarfs_get_tree(struct fs_context *fc)
+ {
+ return get_tree_single(fc, efivarfs_fill_super);
+ }
+
+ static const struct fs_context_operations efivarfs_context_ops = {
+ .get_tree = efivarfs_get_tree,
+ };
+
+ static int efivarfs_init_fs_context(struct fs_context *fc)
{
- return mount_single(fs_type, flags, data, efivarfs_fill_super);
+ fc->ops = &efivarfs_context_ops;
+ return 0;
}
static void efivarfs_kill_sb(struct super_block *sb)
{
kill_litter_super(sb);
- efivarfs_sb = NULL;
/* Remove all entries and destroy */
__efivar_entry_iter(efivarfs_destroy, &efivarfs_list, NULL, NULL);
static struct file_system_type efivarfs_type = {
.owner = THIS_MODULE,
.name = "efivarfs",
- .mount = efivarfs_mount,
+ .init_fs_context = efivarfs_init_fs_context,
.kill_sb = efivarfs_kill_sb,
};
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Filesystem parameter parser.
*
* Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
*/
#include <linux/export.h>
return invalf(fc, "%s: not usable as path", param->key);
}
+ f->refcnt++; /* filename_lookup() drops our ref. */
ret = filename_lookup(param->dirfd, f, flags, _path, NULL);
if (ret < 0) {
errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name);
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Filesystem access-by-fd.
*
* Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
*/
#include <linux/fs_context.h>
case FSCONFIG_CMD_CREATE:
if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
return -EBUSY;
+ if (!mount_capable(fc))
+ return -EPERM;
fc->phase = FS_CONTEXT_CREATING;
ret = vfs_get_tree(fc);
if (ret)
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/* fs/ internal definitions
*
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
struct super_block;
struct mount;
struct shrink_control;
struct fs_context;
+ struct user_namespace;
/*
* block_dev.c
extern void guard_bio_eod(int rw, struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
-void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
- struct page *page);
/*
* char_dev.c
extern int reconfigure_super(struct fs_context *);
extern bool trylock_super(struct super_block *sb);
extern struct super_block *user_get_super(dev_t);
+ extern bool mount_capable(struct fs_context *);
/*
* open.c
extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
extern struct dentry *d_alloc_cursor(struct dentry *);
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
+ extern char *simple_dname(struct dentry *, char *, int);
/*
* read_write.c
+// SPDX-License-Identifier: GPL-2.0-only
/*
* fs/libfs.c
* Library for filesystems writers.
#include <linux/exportfs.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h> /* sync_mapping_buffers */
+ #include <linux/fs_context.h>
+ #include <linux/pseudo_fs.h>
#include <linux/uaccess.h>
.statfs = simple_statfs,
};
- /*
- * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
- * will never be mountable)
- */
- struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
- const struct super_operations *ops, const struct xattr_handler **xattr,
- const struct dentry_operations *dops, unsigned long magic)
+ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
{
- struct super_block *s;
- struct dentry *dentry;
+ struct pseudo_fs_context *ctx = fc->fs_private;
struct inode *root;
- struct qstr d_name = QSTR_INIT(name, strlen(name));
-
- s = sget_userns(fs_type, NULL, set_anon_super, SB_KERNMOUNT|SB_NOUSER,
- &init_user_ns, NULL);
- if (IS_ERR(s))
- return ERR_CAST(s);
s->s_maxbytes = MAX_LFS_FILESIZE;
s->s_blocksize = PAGE_SIZE;
s->s_blocksize_bits = PAGE_SHIFT;
- s->s_magic = magic;
- s->s_op = ops ? ops : &simple_super_operations;
- s->s_xattr = xattr;
+ s->s_magic = ctx->magic;
+ s->s_op = ctx->ops ?: &simple_super_operations;
+ s->s_xattr = ctx->xattr;
s->s_time_gran = 1;
root = new_inode(s);
if (!root)
- goto Enomem;
+ return -ENOMEM;
+
/*
* since this is the first inode, make it number 1. New inodes created
* after this must take care not to collide with it (by passing
root->i_ino = 1;
root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
- dentry = __d_alloc(s, &d_name);
- if (!dentry) {
- iput(root);
- goto Enomem;
+ s->s_root = d_make_root(root);
+ if (!s->s_root)
+ return -ENOMEM;
+ s->s_d_op = ctx->dops;
+ return 0;
+ }
+
+ static int pseudo_fs_get_tree(struct fs_context *fc)
+ {
+ return get_tree_nodev(fc, pseudo_fs_fill_super);
+ }
+
+ static void pseudo_fs_free(struct fs_context *fc)
+ {
+ kfree(fc->fs_private);
+ }
+
+ static const struct fs_context_operations pseudo_fs_context_ops = {
+ .free = pseudo_fs_free,
+ .get_tree = pseudo_fs_get_tree,
+ };
+
+ /*
+ * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
+ * will never be mountable)
+ */
+ struct pseudo_fs_context *init_pseudo(struct fs_context *fc,
+ unsigned long magic)
+ {
+ struct pseudo_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL);
+ if (likely(ctx)) {
+ ctx->magic = magic;
+ fc->fs_private = ctx;
+ fc->ops = &pseudo_fs_context_ops;
+ fc->sb_flags |= SB_NOUSER;
+ fc->global = true;
}
- d_instantiate(dentry, root);
- s->s_root = dentry;
- s->s_d_op = dops;
- s->s_flags |= SB_ACTIVE;
- return dget(s->s_root);
-
- Enomem:
- deactivate_locked_super(s);
- return ERR_PTR(-ENOMEM);
+ return ctx;
}
- EXPORT_SYMBOL(mount_pseudo_xattr);
+ EXPORT_SYMBOL(init_pseudo);
int simple_open(struct inode *inode, struct file *file)
{
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/namespace.c
*
* (C) Copyright Al Viro 2000, 2001
- * Released under GPL v2.
*
* Based on code from fs/super.c, copyright Linus Torvalds and others.
* Heavily rewritten.
#include <linux/sched/task.h>
#include <uapi/linux/mount.h>
#include <linux/fs_context.h>
+ #include <linux/shmem_fs.h>
#include "pnode.h"
#include "internal.h"
/* Notice when we are propagating across user namespaces */
if (child->mnt_parent->mnt_ns->user_ns != user_ns)
lock_mnt_tree(child);
+ child->mnt.mnt_flags &= ~MNT_LOCKED;
commit_tree(child);
}
put_mountpoint(smp);
if (!check_mnt(p))
goto out;
- /* The thing moved should be either ours or completely unattached. */
- if (attached && !check_mnt(old))
+ /* The thing moved must be mounted... */
+ if (!is_mounted(&old->mnt))
goto out;
- if (!attached && !(ns && is_anon_ns(ns)))
+ /* ... and either ours or the root of anon namespace */
+ if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
goto out;
if (old->mnt.mnt_flags & MNT_LOCKED)
err = vfs_parse_fs_string(fc, "source", name, strlen(name));
if (!err)
err = parse_monolithic_mount_data(fc, data);
+ if (!err && !mount_capable(fc))
+ err = -EPERM;
if (!err)
err = vfs_get_tree(fc);
if (!err)
}
EXPORT_SYMBOL(mount_subtree);
- int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type,
- unsigned long flags, void __user *data)
+ int ksys_mount(const char __user *dev_name, const char __user *dir_name,
+ const char __user *type, unsigned long flags, void __user *data)
{
int ret;
char *kernel_type;
ns->root = mnt;
ns->mounts = 1;
list_add(&mnt->mnt_list, &ns->list);
+ mntget(newmount.mnt);
/* Attach to an apparent O_PATH fd with a note that we need to unmount
* it, not just simply put it.
struct mount *m;
struct mnt_namespace *ns;
struct path root;
- struct file_system_type *type;
- type = get_fs_type("rootfs");
- if (!type)
- panic("Can't find rootfs type");
- mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
- put_filesystem(type);
+ mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
if (IS_ERR(mnt))
panic("Can't create rootfs");
fs_kobj = kobject_create_and_add("fs", NULL);
if (!fs_kobj)
printk(KERN_WARNING "%s: kobj create error\n", __func__);
+ shmem_init();
init_rootfs();
init_mount_tree();
}
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Syscall interface to knfsd.
*
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/ctype.h>
+ #include <linux/fs_context.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/lockd/lockd.h>
#include <linux/sunrpc/gss_krb5_enctypes.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/module.h>
+#include <linux/fsnotify.h>
#include "idmap.h"
#include "nfsd.h"
NFSD_RecoveryDir,
NFSD_V4EndGrace,
#endif
+ NFSD_MaxReserved
};
/*
* populating the filesystem.
*/
- static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
+/* Basically copying rpc_get_inode. */
+static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
+{
+ struct inode *inode = new_inode(sb);
+ if (!inode)
+ return NULL;
+ /* Following advice from simple_fill_super documentation: */
+ inode->i_ino = iunique(sb, NFSD_MaxReserved);
+ inode->i_mode = mode;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ switch (mode & S_IFMT) {
+ case S_IFDIR:
+ inode->i_fop = &simple_dir_operations;
+ inode->i_op = &simple_dir_inode_operations;
+ inc_nlink(inode);
+ default:
+ break;
+ }
+ return inode;
+}
+
+static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode;
+
+ inode = nfsd_get_inode(dir->i_sb, mode);
+ if (!inode)
+ return -ENOMEM;
+ d_add(dentry, inode);
+ inc_nlink(dir);
+ fsnotify_mkdir(dir, dentry);
+ return 0;
+}
+
+static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name)
+{
+ struct inode *dir = parent->d_inode;
+ struct dentry *dentry;
+ int ret = -ENOMEM;
+
+ inode_lock(dir);
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ goto out_err;
+ ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600);
+ if (ret)
+ goto out_err;
+ if (ncl) {
+ d_inode(dentry)->i_private = ncl;
+ kref_get(&ncl->cl_ref);
+ }
+out:
+ inode_unlock(dir);
+ return dentry;
+out_err:
+ dentry = ERR_PTR(ret);
+ goto out;
+}
+
+static void clear_ncl(struct inode *inode)
+{
+ struct nfsdfs_client *ncl = inode->i_private;
+
+ inode->i_private = NULL;
+ synchronize_rcu();
+ kref_put(&ncl->cl_ref, ncl->cl_release);
+}
+
+
+static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc = inode->i_private;
+
+ if (nc)
+ kref_get(&nc->cl_ref);
+ return nc;
+}
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+
+ rcu_read_lock();
+ nc = __get_nfsdfs_client(inode);
+ rcu_read_unlock();
+ return nc;
+}
+/* from __rpc_unlink */
+static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry)
+{
+ int ret;
+
+ clear_ncl(d_inode(dentry));
+ dget(dentry);
+ ret = simple_unlink(dir, dentry);
+ d_delete(dentry);
+ dput(dentry);
+ WARN_ON_ONCE(ret);
+}
+
+static void nfsdfs_remove_files(struct dentry *root)
+{
+ struct dentry *dentry, *tmp;
+
+ list_for_each_entry_safe(dentry, tmp, &root->d_subdirs, d_child) {
+ if (!simple_positive(dentry)) {
+ WARN_ON_ONCE(1); /* I think this can't happen? */
+ continue;
+ }
+ nfsdfs_remove_file(d_inode(root), dentry);
+ }
+}
+
+/* XXX: cut'n'paste from simple_fill_super; figure out if we could share
+ * code instead. */
+static int nfsdfs_create_files(struct dentry *root,
+ const struct tree_descr *files)
+{
+ struct inode *dir = d_inode(root);
+ struct inode *inode;
+ struct dentry *dentry;
+ int i;
+
+ inode_lock(dir);
+ for (i = 0; files->name && files->name[0]; i++, files++) {
+ if (!files->name)
+ continue;
+ dentry = d_alloc_name(root, files->name);
+ if (!dentry)
+ goto out;
+ inode = nfsd_get_inode(d_inode(root)->i_sb,
+ S_IFREG | files->mode);
+ if (!inode) {
+ dput(dentry);
+ goto out;
+ }
+ inode->i_fop = files->ops;
+ inode->i_private = __get_nfsdfs_client(dir);
+ d_add(dentry, inode);
+ fsnotify_create(dir, dentry);
+ }
+ inode_unlock(dir);
+ return 0;
+out:
+ nfsdfs_remove_files(root);
+ inode_unlock(dir);
+ return -ENOMEM;
+}
+
+/* on success, returns positive number unique to that client. */
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id,
+ const struct tree_descr *files)
+{
+ struct dentry *dentry;
+ char name[11];
+ int ret;
+
+ sprintf(name, "%u", id);
+
+ dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name);
+ if (IS_ERR(dentry)) /* XXX: tossing errors? */
+ return NULL;
+ ret = nfsdfs_create_files(dentry, files);
+ if (ret) {
+ nfsd_client_rmdir(dentry);
+ return NULL;
+ }
+ return dentry;
+}
+
+/* Taken from __rpc_rmdir: */
+void nfsd_client_rmdir(struct dentry *dentry)
+{
+ struct inode *dir = d_inode(dentry->d_parent);
+ struct inode *inode = d_inode(dentry);
+ int ret;
+
+ inode_lock(dir);
+ nfsdfs_remove_files(dentry);
+ clear_ncl(inode);
+ dget(dentry);
+ ret = simple_rmdir(dir, dentry);
+ WARN_ON_ONCE(ret);
+ d_delete(dentry);
+ inode_unlock(dir);
+}
+
+ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ struct dentry *dentry;
+ int ret;
+
static const struct tree_descr nfsd_files[] = {
[NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO},
[NFSD_Export_features] = {"export_features",
#endif
/* last one */ {""}
};
- get_net(sb->s_fs_info);
+
- return simple_fill_super(sb, 0x6e667364, nfsd_files);
+ ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
+ if (ret)
+ return ret;
+ dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ nn->nfsd_client_dir = dentry;
+ return 0;
+ }
+ static int nfsd_fs_get_tree(struct fs_context *fc)
+ {
+ fc->s_fs_info = get_net(fc->net_ns);
+ return vfs_get_super(fc, vfs_get_keyed_super, nfsd_fill_super);
}
- static struct dentry *nfsd_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static void nfsd_fs_free_fc(struct fs_context *fc)
{
- struct net *net = current->nsproxy->net_ns;
- return mount_ns(fs_type, flags, data, net, net->user_ns, nfsd_fill_super);
+ if (fc->s_fs_info)
+ put_net(fc->s_fs_info);
+ }
+
+ static const struct fs_context_operations nfsd_fs_context_ops = {
+ .free = nfsd_fs_free_fc,
+ .get_tree = nfsd_fs_get_tree,
+ };
+
+ static int nfsd_init_fs_context(struct fs_context *fc)
+ {
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(fc->net_ns->user_ns);
+ fc->ops = &nfsd_fs_context_ops;
+ return 0;
}
static void nfsd_umount(struct super_block *sb)
static struct file_system_type nfsd_fs_type = {
.owner = THIS_MODULE,
.name = "nfsd",
- .mount = nfsd_mount,
+ .init_fs_context = nfsd_init_fs_context,
.kill_sb = nfsd_umount,
};
MODULE_ALIAS_FS("nfsd");
static __net_init int nfsd_init_net(struct net *net)
{
int retval;
+ struct vfsmount *mnt;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
retval = nfsd_export_init(net);
goto out_idmap_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+ goto out_drc_error;
nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
nn->somebody_reclaimed = false;
nn->track_reclaim_completes = false;
nn->clverifier_counter = prandom_u32();
- nn->clientid_counter = prandom_u32();
+ nn->clientid_base = prandom_u32();
+ nn->clientid_counter = nn->clientid_base + 1;
nn->s2s_cp_cl_id = nn->clientid_counter++;
atomic_set(&nn->ntf_refcnt, 0);
init_waitqueue_head(&nn->ntf_wq);
+
+ mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
+ if (IS_ERR(mnt)) {
+ retval = PTR_ERR(mnt);
+ goto out_mount_err;
+ }
+ nn->nfsd_mnt = mnt;
return 0;
+out_mount_err:
+ nfsd_reply_cache_shutdown(nn);
+out_drc_error:
+ nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
static __net_exit void nfsd_exit_net(struct net *net)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ mntput(nn->nfsd_mnt);
+ nfsd_reply_cache_shutdown(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
retval = nfsd4_init_pnfs();
if (retval)
goto out_free_slabs;
- retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
- if (retval)
- goto out_exit_pnfs;
+ nfsd_fault_inject_init(); /* nfsd fault injection controls */
nfsd_stat_init(); /* Statistics */
- retval = nfsd_reply_cache_init();
- if (retval)
- goto out_free_stat;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
nfsd_lockd_shutdown();
- nfsd_reply_cache_shutdown();
-out_free_stat:
nfsd_stat_shutdown();
nfsd_fault_inject_cleanup();
-out_exit_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
nfsd4_free_slabs();
static void __exit exit_nfsd(void)
{
- nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
+// SPDX-License-Identifier: GPL-2.0-only
/* inode.c: /proc/openprom handling routines
*
* Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com)
#include <linux/types.h>
#include <linux/string.h>
#include <linux/fs.h>
+ #include <linux/fs_context.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
.remount_fs = openprom_remount,
};
- static int openprom_fill_super(struct super_block *s, void *data, int silent)
+ static int openprom_fill_super(struct super_block *s, struct fs_context *fc)
{
struct inode *root_inode;
struct op_inode_info *oi;
return ret;
}
- static struct dentry *openprom_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int openpromfs_get_tree(struct fs_context *fc)
{
- return mount_single(fs_type, flags, data, openprom_fill_super);
+ return get_tree_single(fc, openprom_fill_super);
+ }
+
+ static const struct fs_context_operations openpromfs_context_ops = {
+ .get_tree = openpromfs_get_tree,
+ };
+
+ static int openpromfs_init_fs_context(struct fs_context *fc)
+ {
+ fc->ops = &openpromfs_context_ops;
+ return 0;
}
static struct file_system_type openprom_fs_type = {
.owner = THIS_MODULE,
.name = "openpromfs",
- .mount = openprom_mount,
+ .init_fs_context = openpromfs_init_fs_context,
.kill_sb = kill_anon_super,
};
MODULE_ALIAS_FS("openpromfs");
{
struct proc_fs_context *ctx = fc->fs_private;
- put_user_ns(fc->user_ns);
- fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
fc->s_fs_info = ctx->pid_ns;
return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super);
}
{
struct proc_fs_context *ctx = fc->fs_private;
- if (ctx->pid_ns)
- put_pid_ns(ctx->pid_ns);
+ put_pid_ns(ctx->pid_ns);
kfree(ctx);
}
return -ENOMEM;
ctx->pid_ns = get_pid_ns(task_active_pid_ns(current));
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
fc->fs_private = ctx;
fc->ops = &proc_fs_context_ops;
return 0;
.init_fs_context = proc_init_fs_context,
.parameters = &proc_fs_parameters,
.kill_sb = proc_kill_sb,
- .fs_flags = FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM,
};
void __init proc_root_init(void)
* Locking rules for dentry_operations callbacks are to be found in
* Documentation/filesystems/Locking. Keep it updated!
*
- * FUrther descriptions are found in Documentation/filesystems/vfs.txt.
+ * FUrther descriptions are found in Documentation/filesystems/vfs.rst.
* Keep it updated too!
*/
*/
extern __printf(4, 5)
char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
- extern char *simple_dname(struct dentry *, char *, int);
extern char *__d_path(const struct path *, const struct path *, char *, int);
extern char *d_absolute_path(const struct path *, char *, int);
* If dentry is on a union/overlay, then return the underlying, real dentry.
* Otherwise return the dentry itself.
*
- * See also: Documentation/filesystems/vfs.txt
+ * See also: Documentation/filesystems/vfs.rst
*/
static inline struct dentry *d_real(struct dentry *dentry,
const struct inode *inode)
atomic_t i_count;
atomic_t i_dio_count;
atomic_t i_writecount;
-#ifdef CONFIG_IMA
+#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
atomic_t i_readcount; /* struct files open RO */
#endif
union {
};
struct lock_manager_operations {
- int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
- unsigned long (*lm_owner_key)(struct file_lock *);
fl_owner_t (*lm_get_owner)(fl_owner_t);
void (*lm_put_owner)(fl_owner_t);
void (*lm_notify)(struct file_lock *); /* unblock callback */
/*
* These flags control the behavior of the remap_file_range function pointer.
* If it is called with len == 0 that means "remap to end of source file".
- * See Documentation/filesystems/vfs.txt for more details about this call.
+ * See Documentation/filesystems/vfs.rst for more details about this call.
*
* REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
* REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
unsigned long, loff_t *, rwf_t);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
+extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags);
extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t *count,
touch_atime(&file->f_path);
}
+extern int file_modified(struct file *file);
+
int sync_inode(struct inode *inode, struct writeback_control *wbc);
int sync_inode_metadata(struct inode *inode, int wait);
#define FS_BINARY_MOUNTDATA 2
#define FS_HAS_SUBTYPE 4
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
+#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_description *parameters;
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
- extern struct dentry *mount_ns(struct file_system_type *fs_type,
- int flags, void *data, void *ns, struct user_namespace *user_ns,
- int (*fill_super)(struct super_block *, void *, int));
#ifdef CONFIG_BLOCK
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
struct super_block *sget_fc(struct fs_context *fc,
int (*test)(struct super_block *, struct fs_context *),
int (*set)(struct super_block *, struct fs_context *));
- struct super_block *sget_userns(struct file_system_type *type,
- int (*test)(struct super_block *,void *),
- int (*set)(struct super_block *,void *),
- int flags, struct user_namespace *user_ns,
- void *data);
struct super_block *sget(struct file_system_type *type,
int (*test)(struct super_block *,void *),
int (*set)(struct super_block *,void *),
int flags, void *data);
- extern struct dentry *mount_pseudo_xattr(struct file_system_type *, char *,
- const struct super_operations *ops,
- const struct xattr_handler **xattr,
- const struct dentry_operations *dops,
- unsigned long);
-
- static inline struct dentry *
- mount_pseudo(struct file_system_type *fs_type, char *name,
- const struct super_operations *ops,
- const struct dentry_operations *dops, unsigned long magic)
- {
- return mount_pseudo_xattr(fs_type, name, ops, NULL, dops, magic);
- }
/* Alas, no aliases. Too much hassle with bringing module.h everywhere */
#define fops_get(fops) \
extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
loff_t lend);
+extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte);
static inline int filemap_fdatawait(struct address_space *mapping)
{
return atomic_read(&inode->i_writecount) > 0;
}
-#ifdef CONFIG_IMA
+#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
static inline void i_readcount_dec(struct inode *inode)
{
BUG_ON(!atomic_read(&inode->i_readcount));
extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t *count, unsigned int remap_flags);
+extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
+extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t *count, unsigned int flags);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
}
#endif
+int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags,
+ unsigned int flags);
+
+int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa,
+ struct fsxattr *fa);
+
+static inline void simple_fill_fsxattr(struct fsxattr *fa, __u32 xflags)
+{
+ memset(fa, 0, sizeof(*fa));
+ fa->fsx_xflags = xflags;
+}
+
#endif /* _LINUX_FS_H */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Filesystem superblock creation and reconfiguration context.
*
* Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
*/
#ifndef _LINUX_FS_CONTEXT_H
* Superblock creation fills in ->root whereas reconfiguration begins with this
* already set.
*
- * See Documentation/filesystems/mounting.txt
+ * See Documentation/filesystems/mount_api.txt
*/
struct fs_context {
const struct fs_context_operations *ops;
void *s_fs_info; /* Proposed s_fs_info */
unsigned int sb_flags; /* Proposed superblock flags (SB_*) */
unsigned int sb_flags_mask; /* Superblock flags that were changed */
+ unsigned int s_iflags; /* OR'd with sb->s_iflags */
unsigned int lsm_flags; /* Information flags from the fs to the LSM */
enum fs_context_purpose purpose:8;
enum fs_context_phase phase:8; /* The phase the context is in */
enum vfs_get_super_keying keying,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
+ extern int get_tree_nodev(struct fs_context *fc,
+ int (*fill_super)(struct super_block *sb,
+ struct fs_context *fc));
+ extern int get_tree_single(struct fs_context *fc,
+ int (*fill_super)(struct super_block *sb,
+ struct fs_context *fc));
extern const struct file_operations fscontext_fops;
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* syscalls.h - Linux syscall interfaces (non-arch-specific)
*
* Copyright (c) 2004 Randy Dunlap
* Copyright (c) 2004 Open Source Development Labs
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
*/
#ifndef _LINUX_SYSCALLS_H
struct rseq;
union bpf_attr;
struct io_uring_params;
+struct clone_args;
#include <linux/types.h>
#include <linux/aio_abi.h>
if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS),
"Invalid address limit on user-mode return"))
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
#ifdef TIF_FSCHECK
clear_thread_flag(TIF_FSCHECK);
int __user *, unsigned long);
#endif
#endif
+
+asmlinkage long sys_clone3(struct clone_args __user *uargs, size_t size);
+
asmlinkage long sys_execve(const char __user *filename,
const char __user *const __user *argv,
const char __user *const __user *envp);
struct old_timex32 __user *tx);
asmlinkage long sys_syncfs(int fd);
asmlinkage long sys_setns(int fd, int nstype);
+asmlinkage long sys_pidfd_open(pid_t pid, unsigned int flags);
asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
unsigned int vlen, unsigned flags);
asmlinkage long sys_process_vm_readv(pid_t pid,
* the ksys_xyzyyz() functions prototyped below.
*/
- int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type,
- unsigned long flags, void __user *data);
+ int ksys_mount(const char __user *dev_name, const char __user *dir_name,
+ const char __user *type, unsigned long flags, void __user *data);
int ksys_umount(char __user *name, int flags);
int ksys_dup(unsigned int fildes);
int ksys_chroot(const char __user *filename);
#define UDF_SUPER_MAGIC 0x15013346
#define BALLOON_KVM_MAGIC 0x13661366
#define ZSMALLOC_MAGIC 0x58295829
+#define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */
+ #define Z3FOLD_MAGIC 0x33
#endif /* __LINUX_MAGIC_H__ */
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/ctype.h>
static struct dentry *rootfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- static unsigned long once;
void *fill = ramfs_fill_super;
- if (test_and_set_bit(0, &once))
- return ERR_PTR(-ENODEV);
-
if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs)
fill = shmem_fill_super;
return mount_nodev(fs_type, flags, data, fill);
}
- static struct file_system_type rootfs_fs_type = {
+ struct file_system_type rootfs_fs_type = {
.name = "rootfs",
.mount = rootfs_mount,
.kill_sb = kill_litter_super,
};
- int __init init_rootfs(void)
+ void __init init_rootfs(void)
{
- int err = register_filesystem(&rootfs_fs_type);
-
- if (err)
- return err;
-
if (IS_ENABLED(CONFIG_TMPFS) && !saved_root_name[0] &&
- (!root_fs_names || strstr(root_fs_names, "tmpfs"))) {
- err = shmem_init();
+ (!root_fs_names || strstr(root_fs_names, "tmpfs")))
is_tmpfs = true;
- } else {
- err = init_ramfs_fs();
- }
-
- if (err)
- unregister_filesystem(&rootfs_fs_type);
-
- return err;
}
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/init/main.c
*
}
#endif
+/* Report memory auto-initialization states for this boot. */
+static void __init report_meminit(void)
+{
+ const char *stack;
+
+ if (IS_ENABLED(CONFIG_INIT_STACK_ALL))
+ stack = "all";
+ else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL))
+ stack = "byref_all";
+ else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF))
+ stack = "byref";
+ else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_USER))
+ stack = "__user";
+ else
+ stack = "off";
+
+ pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n",
+ stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off",
+ want_init_on_free() ? "on" : "off");
+ if (want_init_on_free())
+ pr_info("mem auto-init: clearing system memory may take some time...\n");
+}
+
/*
* Set up kernel memory allocators
*/
* bigger than MAX_ORDER unless SPARSEMEM.
*/
page_ext_init_flatmem();
+ report_meminit();
mem_init();
kmem_cache_init();
pgtable_init();
static void __init do_basic_setup(void)
{
cpuset_init_smp();
- shmem_init();
driver_init();
init_irq_proc();
do_ctors();
{
struct mqueue_fs_context *ctx = fc->fs_private;
- put_user_ns(fc->user_ns);
- fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
fc->s_fs_info = ctx->ipc_ns;
return vfs_get_super(fc, vfs_get_keyed_super, mqueue_fill_super);
}
{
struct mqueue_fs_context *ctx = fc->fs_private;
- if (ctx->ipc_ns)
- put_ipc_ns(ctx->ipc_ns);
+ put_ipc_ns(ctx->ipc_ns);
kfree(ctx);
}
return -ENOMEM;
ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns);
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
fc->fs_private = ctx;
fc->ops = &mqueue_fs_context_ops;
return 0;
ctx = fc->fs_private;
put_ipc_ns(ctx->ipc_ns);
ctx->ipc_ns = get_ipc_ns(ns);
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
mnt = fc_mount(fc);
put_fs_context(fc);
{
struct mqueue_inode_info *info;
struct user_struct *user;
- unsigned long mq_bytes, mq_treesize;
struct ipc_namespace *ipc_ns;
struct msg_msg *msg, *nmsg;
LIST_HEAD(tmp_msg);
free_msg(msg);
}
- /* Total amount of bytes accounted for the mqueue */
- mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
- min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
- sizeof(struct posix_msg_tree_node);
-
- mq_bytes = mq_treesize + (info->attr.mq_maxmsg *
- info->attr.mq_msgsize);
-
user = info->user;
if (user) {
+ unsigned long mq_bytes, mq_treesize;
+
+ /* Total amount of bytes accounted for the mqueue */
+ mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
+ min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
+ sizeof(struct posix_msg_tree_node);
+
+ mq_bytes = mq_treesize + (info->attr.mq_maxmsg *
+ info->attr.mq_msgsize);
+
spin_lock(&mq_lock);
user->mq_bytes -= mq_bytes;
/*
*/
static DEFINE_SPINLOCK(cgroup_file_kn_lock);
-struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
#define cgroup_assert_mutex_or_rcu_locked() \
RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
static int cgroup_apply_control(struct cgroup *cgrp);
static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
-static void css_task_iter_advance(struct css_task_iter *it);
+static void css_task_iter_skip(struct css_task_iter *it,
+ struct task_struct *task);
static int cgroup_destroy_locked(struct cgroup *cgrp);
static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
struct cgroup_subsys *ss);
.dom_cset = &init_css_set,
.tasks = LIST_HEAD_INIT(init_css_set.tasks),
.mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
+ .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
.task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
.threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
.cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
cgroup_update_populated(link->cgrp, populated);
}
+/*
+ * @task is leaving, advance task iterators which are pointing to it so
+ * that they can resume at the next position. Advancing an iterator might
+ * remove it from the list, use safe walk. See css_task_iter_skip() for
+ * details.
+ */
+static void css_set_skip_task_iters(struct css_set *cset,
+ struct task_struct *task)
+{
+ struct css_task_iter *it, *pos;
+
+ list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
+ css_task_iter_skip(it, task);
+}
+
/**
* css_set_move_task - move a task from one css_set to another
* @task: task being moved
css_set_update_populated(to_cset, true);
if (from_cset) {
- struct css_task_iter *it, *pos;
-
WARN_ON_ONCE(list_empty(&task->cg_list));
- /*
- * @task is leaving, advance task iterators which are
- * pointing to it so that they can resume at the next
- * position. Advancing an iterator might remove it from
- * the list, use safe walk. See css_task_iter_advance*()
- * for details.
- */
- list_for_each_entry_safe(it, pos, &from_cset->task_iters,
- iters_node)
- if (it->task_pos == &task->cg_list)
- css_task_iter_advance(it);
-
+ css_set_skip_task_iters(from_cset, task);
list_del_init(&task->cg_list);
if (!css_set_populated(from_cset))
css_set_update_populated(from_cset, false);
cset->dom_cset = cset;
INIT_LIST_HEAD(&cset->tasks);
INIT_LIST_HEAD(&cset->mg_tasks);
+ INIT_LIST_HEAD(&cset->dying_tasks);
INIT_LIST_HEAD(&cset->task_iters);
INIT_LIST_HEAD(&cset->threaded_csets);
INIT_HLIST_NODE(&cset->hlist);
enum cgroup2_param {
Opt_nsdelegate,
+ Opt_memory_localevents,
nr__cgroup2_params
};
static const struct fs_parameter_spec cgroup2_param_specs[] = {
- fsparam_flag ("nsdelegate", Opt_nsdelegate),
+ fsparam_flag("nsdelegate", Opt_nsdelegate),
+ fsparam_flag("memory_localevents", Opt_memory_localevents),
{}
};
case Opt_nsdelegate:
ctx->flags |= CGRP_ROOT_NS_DELEGATE;
return 0;
+ case Opt_memory_localevents:
+ ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
+ return 0;
}
return -EINVAL;
}
cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
else
cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
+
+ if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
+ cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
+ else
+ cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS;
}
}
{
if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
seq_puts(seq, ",nsdelegate");
+ if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
+ seq_puts(seq, ",memory_localevents");
return 0;
}
fc->ops = &cgroup_fs_context_ops;
else
fc->ops = &cgroup1_fs_context_ops;
- if (fc->user_ns)
- put_user_ns(fc->user_ns);
+ put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(ctx->ns->user_ns);
fc->global = true;
return 0;
.fs_flags = FS_USERNS_MOUNT,
};
+ #ifdef CONFIG_CPUSETS
+ static const struct fs_context_operations cpuset_fs_context_ops = {
+ .get_tree = cgroup1_get_tree,
+ .free = cgroup_fs_context_free,
+ };
+
+ /*
+ * This is ugly, but preserves the userspace API for existing cpuset
+ * users. If someone tries to mount the "cpuset" filesystem, we
+ * silently switch it to mount "cgroup" instead
+ */
+ static int cpuset_init_fs_context(struct fs_context *fc)
+ {
+ char *agent = kstrdup("/sbin/cpuset_release_agent", GFP_USER);
+ struct cgroup_fs_context *ctx;
+ int err;
+
+ err = cgroup_init_fs_context(fc);
+ if (err) {
+ kfree(agent);
+ return err;
+ }
+
+ fc->ops = &cpuset_fs_context_ops;
+
+ ctx = cgroup_fc2context(fc);
+ ctx->subsys_mask = 1 << cpuset_cgrp_id;
+ ctx->flags |= CGRP_ROOT_NOPREFIX;
+ ctx->release_agent = agent;
+
+ get_filesystem(&cgroup_fs_type);
+ put_filesystem(fc->fs_type);
+ fc->fs_type = &cgroup_fs_type;
+
+ return 0;
+ }
+
+ static struct file_system_type cpuset_fs_type = {
+ .name = "cpuset",
+ .init_fs_context = cpuset_init_fs_context,
+ .fs_flags = FS_USERNS_MOUNT,
+ };
+ #endif
+
int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
struct cgroup_namespace *ns)
{
return NULL;
}
+EXPORT_SYMBOL_GPL(css_next_descendant_pre);
/**
* css_rightmost_descendant - return the rightmost descendant of a css
it->task_pos = NULL;
return;
}
- } while (!css_set_populated(cset));
+ } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
if (!list_empty(&cset->tasks))
it->task_pos = cset->tasks.next;
- else
+ else if (!list_empty(&cset->mg_tasks))
it->task_pos = cset->mg_tasks.next;
+ else
+ it->task_pos = cset->dying_tasks.next;
it->tasks_head = &cset->tasks;
it->mg_tasks_head = &cset->mg_tasks;
+ it->dying_tasks_head = &cset->dying_tasks;
/*
* We don't keep css_sets locked across iteration steps and thus
list_add(&it->iters_node, &cset->task_iters);
}
+static void css_task_iter_skip(struct css_task_iter *it,
+ struct task_struct *task)
+{
+ lockdep_assert_held(&css_set_lock);
+
+ if (it->task_pos == &task->cg_list) {
+ it->task_pos = it->task_pos->next;
+ it->flags |= CSS_TASK_ITER_SKIPPED;
+ }
+}
+
static void css_task_iter_advance(struct css_task_iter *it)
{
- struct list_head *next;
+ struct task_struct *task;
lockdep_assert_held(&css_set_lock);
repeat:
* consumed first and then ->mg_tasks. After ->mg_tasks,
* we move onto the next cset.
*/
- next = it->task_pos->next;
-
- if (next == it->tasks_head)
- next = it->mg_tasks_head->next;
+ if (it->flags & CSS_TASK_ITER_SKIPPED)
+ it->flags &= ~CSS_TASK_ITER_SKIPPED;
+ else
+ it->task_pos = it->task_pos->next;
- if (next == it->mg_tasks_head)
+ if (it->task_pos == it->tasks_head)
+ it->task_pos = it->mg_tasks_head->next;
+ if (it->task_pos == it->mg_tasks_head)
+ it->task_pos = it->dying_tasks_head->next;
+ if (it->task_pos == it->dying_tasks_head)
css_task_iter_advance_css_set(it);
- else
- it->task_pos = next;
} else {
/* called from start, proceed to the first cset */
css_task_iter_advance_css_set(it);
}
- /* if PROCS, skip over tasks which aren't group leaders */
- if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
- !thread_group_leader(list_entry(it->task_pos, struct task_struct,
- cg_list)))
- goto repeat;
+ if (!it->task_pos)
+ return;
+
+ task = list_entry(it->task_pos, struct task_struct, cg_list);
+
+ if (it->flags & CSS_TASK_ITER_PROCS) {
+ /* if PROCS, skip over tasks which aren't group leaders */
+ if (!thread_group_leader(task))
+ goto repeat;
+
+ /* and dying leaders w/o live member threads */
+ if (!atomic_read(&task->signal->live))
+ goto repeat;
+ } else {
+ /* skip all dying ones */
+ if (task->flags & PF_EXITING)
+ goto repeat;
+ }
}
/**
spin_lock_irq(&css_set_lock);
+ /* @it may be half-advanced by skips, finish advancing */
+ if (it->flags & CSS_TASK_ITER_SKIPPED)
+ css_task_iter_advance(it);
+
if (it->task_pos) {
it->cur_task = list_entry(it->task_pos, struct task_struct,
cg_list);
if (cgrp->kn)
RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
NULL);
-
- cgroup_bpf_put(cgrp);
}
mutex_unlock(&cgroup_mutex);
cgroup1_check_for_release(parent);
+ cgroup_bpf_offline(cgrp);
+
/* put the base reference */
percpu_ref_kill(&cgrp->self.refcnt);
int ssid;
BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
- BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
WARN_ON(register_filesystem(&cgroup_fs_type));
WARN_ON(register_filesystem(&cgroup2_fs_type));
WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
+ #ifdef CONFIG_CPUSETS
+ WARN_ON(register_filesystem(&cpuset_fs_type));
+ #endif
return 0;
}
if (!list_empty(&tsk->cg_list)) {
spin_lock_irq(&css_set_lock);
css_set_move_task(tsk, cset, NULL, false);
+ list_add_tail(&tsk->cg_list, &cset->dying_tasks);
cset->nr_tasks--;
WARN_ON_ONCE(cgroup_task_frozen(tsk));
do_each_subsys_mask(ss, ssid, have_release_callback) {
ss->release(task);
} while_each_subsys_mask();
+
+ if (use_task_css_set_links) {
+ spin_lock_irq(&css_set_lock);
+ css_set_skip_task_iters(task_css_set(task), task);
+ list_del_init(&task->cg_list);
+ spin_unlock_irq(&css_set_lock);
+ }
}
void cgroup_free(struct task_struct *task)
}
EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
+static u64 power_of_ten(int power)
+{
+ u64 v = 1;
+ while (power--)
+ v *= 10;
+ return v;
+}
+
+/**
+ * cgroup_parse_float - parse a floating number
+ * @input: input string
+ * @dec_shift: number of decimal digits to shift
+ * @v: output
+ *
+ * Parse a decimal floating point number in @input and store the result in
+ * @v with decimal point right shifted @dec_shift times. For example, if
+ * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345.
+ * Returns 0 on success, -errno otherwise.
+ *
+ * There's nothing cgroup specific about this function except that it's
+ * currently the only user.
+ */
+int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
+{
+ s64 whole, frac = 0;
+ int fstart = 0, fend = 0, flen;
+
+ if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend))
+ return -EINVAL;
+ if (frac < 0)
+ return -EINVAL;
+
+ flen = fend > fstart ? fend - fstart : 0;
+ if (flen < dec_shift)
+ frac *= power_of_ten(dec_shift - flen);
+ else
+ frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift));
+
+ *v = whole * power_of_ten(dec_shift) + frac;
+ return 0;
+}
+
/*
* sock->sk_cgrp_data handling. For more info, see sock_cgroup_data
* definition in cgroup-defs.h.
* Don't use cgroup_get_live().
*/
cgroup_get(sock_cgroup_ptr(skcd));
+ cgroup_bpf_get(sock_cgroup_ptr(skcd));
return;
}
cset = task_css_set(current);
if (likely(cgroup_tryget(cset->dfl_cgrp))) {
skcd->val = (unsigned long)cset->dfl_cgrp;
+ cgroup_bpf_get(cset->dfl_cgrp);
break;
}
cpu_relax();
void cgroup_sk_free(struct sock_cgroup_data *skcd)
{
- cgroup_put(sock_cgroup_ptr(skcd));
+ struct cgroup *cgrp = sock_cgroup_ptr(skcd);
+
+ cgroup_bpf_put(cgrp);
+ cgroup_put(cgrp);
}
#endif /* CONFIG_SOCK_CGROUP_DATA */
static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "nsdelegate\n");
+ return snprintf(buf, PAGE_SIZE, "nsdelegate\nmemory_localevents\n");
}
static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
}
subsys_initcall(cgroup_sysfs_init);
+
#endif /* CONFIG_SYSFS */
(cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
}
- /*
- * This is ugly, but preserves the userspace API for existing cpuset
- * users. If someone tries to mount the "cpuset" filesystem, we
- * silently switch it to mount "cgroup" instead
- */
- static int cpuset_get_tree(struct fs_context *fc)
- {
- struct file_system_type *cgroup_fs;
- struct fs_context *new_fc;
- int ret;
-
- cgroup_fs = get_fs_type("cgroup");
- if (!cgroup_fs)
- return -ENODEV;
-
- new_fc = fs_context_for_mount(cgroup_fs, fc->sb_flags);
- if (IS_ERR(new_fc)) {
- ret = PTR_ERR(new_fc);
- } else {
- static const char agent_path[] = "/sbin/cpuset_release_agent";
- ret = vfs_parse_fs_string(new_fc, "cpuset", NULL, 0);
- if (!ret)
- ret = vfs_parse_fs_string(new_fc, "noprefix", NULL, 0);
- if (!ret)
- ret = vfs_parse_fs_string(new_fc, "release_agent",
- agent_path, sizeof(agent_path) - 1);
- if (!ret)
- ret = vfs_get_tree(new_fc);
- if (!ret) { /* steal the result */
- fc->root = new_fc->root;
- new_fc->root = NULL;
- }
- put_fs_context(new_fc);
- }
- put_filesystem(cgroup_fs);
- return ret;
- }
-
- static const struct fs_context_operations cpuset_fs_context_ops = {
- .get_tree = cpuset_get_tree,
- };
-
- static int cpuset_init_fs_context(struct fs_context *fc)
- {
- fc->ops = &cpuset_fs_context_ops;
- return 0;
- }
-
- static struct file_system_type cpuset_fs_type = {
- .name = "cpuset",
- .init_fs_context = cpuset_init_fs_context,
- };
-
/*
* Return in pmask the portion of a cpusets's cpus_allowed that
* are online. If none are online, walk up the cpuset hierarchy
* load balancing domains (sched domains) as specified by that partial
* partition.
*
- * See "What is sched_load_balance" in Documentation/cgroup-v1/cpusets.txt
+ * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
* for a background explanation of this.
*
* Does not return errors, on the theory that the callers of this
if (task_css_is_root(task, cpuset_cgrp_id))
return;
- set_cpus_allowed_ptr(task, ¤t->cpus_allowed);
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
task->mems_allowed = current->mems_allowed;
}
/**
* cpuset_init - initialize cpusets at system boot
*
- * Description: Initialize top_cpuset and the cpuset internal file system,
+ * Description: Initialize top_cpuset
**/
int __init cpuset_init(void)
{
- int err = 0;
-
BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));
set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
top_cpuset.relax_domain_level = -1;
- err = register_filesystem(&cpuset_fs_type);
- if (err < 0)
- return err;
-
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
return 0;
spin_unlock_irqrestore(&callback_lock, flags);
}
+/**
+ * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
+ * @tsk: pointer to task_struct with which the scheduler is struggling
+ *
+ * Description: In the case that the scheduler cannot find an allowed cpu in
+ * tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
+ * mode however, this value is the same as task_cs(tsk)->effective_cpus,
+ * which will not contain a sane cpumask during cases such as cpu hotplugging.
+ * This is the absolute last resort for the scheduler and it is only used if
+ * _every_ other avenue has been traveled.
+ **/
+
void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
rcu_read_lock();
- do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus);
+ do_set_cpus_allowed(tsk, is_in_v2_mode() ?
+ task_cs(tsk)->cpus_allowed : cpu_possible_mask);
rcu_read_unlock();
/*
static int shmem_huge __read_mostly;
-#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
+#if defined(CONFIG_SYSFS)
static int shmem_parse_huge(const char *str)
{
if (!strcmp(str, "never"))
return SHMEM_HUGE_FORCE;
return -EINVAL;
}
+#endif
+#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
static const char *shmem_format_huge(int huge)
{
switch (huge) {
if (xas_error(&xas))
goto unlock;
next:
- xas_store(&xas, page);
+ xas_store(&xas, page + i);
if (++i < nr) {
xas_next(&xas);
goto next;
{
int error;
- /* If rootfs called this, don't re-init */
- if (shmem_inode_cachep)
- return 0;
-
shmem_init_inodecache();
error = register_filesystem(&shmem_fs_type);
loff_t i_size;
pgoff_t off;
+ if ((vma->vm_flags & VM_NOHUGEPAGE) ||
+ test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+ return false;
if (shmem_huge == SHMEM_HUGE_FORCE)
return true;
if (shmem_huge == SHMEM_HUGE_DENY)
+// SPDX-License-Identifier: GPL-2.0-only
/*
* z3fold.c
*
#include <linux/atomic.h>
#include <linux/sched.h>
#include <linux/cpumask.h>
- #include <linux/dcache.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/compaction.h>
#include <linux/percpu.h>
#include <linux/mount.h>
+ #include <linux/pseudo_fs.h>
#include <linux/fs.h>
#include <linux/preempt.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/zpool.h>
+ #include <linux/magic.h>
/*
* NCHUNKS_ORDER determines the internal allocation granularity, effectively
* @refcount: reference count for the z3fold page
* @work: work_struct for page layout optimization
* @slots: pointer to the structure holding buddy slots
+ * @pool: pointer to the containing pool
* @cpu: CPU which this page "belongs" to
* @first_chunks: the size of the first buddy in chunks, 0 if free
* @middle_chunks: the size of the middle buddy in chunks, 0 if free
struct kref refcount;
struct work_struct work;
struct z3fold_buddy_slots *slots;
+ struct z3fold_pool *pool;
short cpu;
unsigned short first_chunks;
unsigned short middle_chunks;
static void compact_page_work(struct work_struct *w);
-static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool)
+static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool,
+ gfp_t gfp)
{
- struct z3fold_buddy_slots *slots = kmem_cache_alloc(pool->c_handle,
- GFP_KERNEL);
+ struct z3fold_buddy_slots *slots;
+
+ slots = kmem_cache_alloc(pool->c_handle,
+ (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE)));
if (slots) {
memset(slots->slot, 0, sizeof(slots->slot));
}
}
- static struct dentry *z3fold_do_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int z3fold_init_fs_context(struct fs_context *fc)
{
- static const struct dentry_operations ops = {
- .d_dname = simple_dname,
- };
-
- return mount_pseudo(fs_type, "z3fold:", NULL, &ops, 0x33);
+ return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type z3fold_fs = {
.name = "z3fold",
- .mount = z3fold_do_mount,
+ .init_fs_context = z3fold_init_fs_context,
.kill_sb = kill_anon_super,
};
/* Initializes the z3fold header of a newly allocated z3fold page */
static struct z3fold_header *init_z3fold_page(struct page *page,
- struct z3fold_pool *pool)
+ struct z3fold_pool *pool, gfp_t gfp)
{
struct z3fold_header *zhdr = page_address(page);
- struct z3fold_buddy_slots *slots = alloc_slots(pool);
+ struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp);
if (!slots)
return NULL;
zhdr->start_middle = 0;
zhdr->cpu = -1;
zhdr->slots = slots;
+ zhdr->pool = pool;
INIT_LIST_HEAD(&zhdr->buddy);
INIT_WORK(&zhdr->work, compact_page_work);
return zhdr;
static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr)
{
- return slots_to_pool(zhdr->slots);
+ return zhdr->pool;
}
static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
enum buddy bud;
bool can_sleep = gfpflags_allow_blocking(gfp);
- if (!size || (gfp & __GFP_HIGHMEM))
+ if (!size)
return -EINVAL;
if (size > PAGE_SIZE)
if (!page)
return -ENOMEM;
- zhdr = init_z3fold_page(page, pool);
+ zhdr = init_z3fold_page(page, pool, gfp);
if (!zhdr) {
__free_page(page);
return -ENOMEM;
set_bit(PAGE_HEADLESS, &page->private);
goto headless;
}
- __SetPageMovable(page, pool->inode->i_mapping);
+ if (can_sleep) {
+ lock_page(page);
+ __SetPageMovable(page, pool->inode->i_mapping);
+ unlock_page(page);
+ } else {
+ if (trylock_page(page)) {
+ __SetPageMovable(page, pool->inode->i_mapping);
+ unlock_page(page);
+ }
+ }
z3fold_page_lock(zhdr);
found:
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
+ VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
zhdr = page_address(page);
pool = zhdr_to_pool(zhdr);
- if (!trylock_page(page))
- return -EAGAIN;
-
if (!z3fold_page_trylock(zhdr)) {
- unlock_page(page);
return -EAGAIN;
}
if (zhdr->mapped_count != 0) {
z3fold_page_unlock(zhdr);
- unlock_page(page);
return -EBUSY;
}
+ if (work_pending(&zhdr->work)) {
+ z3fold_page_unlock(zhdr);
+ return -EAGAIN;
+ }
new_zhdr = page_address(newpage);
memcpy(new_zhdr, zhdr, PAGE_SIZE);
newpage->private = page->private;
page->private = 0;
z3fold_page_unlock(zhdr);
spin_lock_init(&new_zhdr->page_lock);
+ INIT_WORK(&new_zhdr->work, compact_page_work);
+ /*
+ * z3fold_page_isolate() ensures that new_zhdr->buddy is empty,
+ * so we only have to reinitialize it.
+ */
+ INIT_LIST_HEAD(&new_zhdr->buddy);
new_mapping = page_mapping(page);
__ClearPageMovable(page);
ClearPagePrivate(page);
queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
page_mapcount_reset(page);
- unlock_page(page);
put_page(page);
return 0;
}
#include <linux/zsmalloc.h>
#include <linux/zpool.h>
#include <linux/mount.h>
+ #include <linux/pseudo_fs.h>
#include <linux/migrate.h>
#include <linux/pagemap.h>
#include <linux/fs.h>
}
zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
- if (!zs_stat_root)
- pr_warn("debugfs 'zsmalloc' stat dir creation failed\n");
}
static void __exit zs_stat_exit(void)
static void zs_pool_stat_create(struct zs_pool *pool, const char *name)
{
- struct dentry *entry;
-
if (!zs_stat_root) {
pr_warn("no root stat dir, not creating <%s> stat dir\n", name);
return;
}
- entry = debugfs_create_dir(name, zs_stat_root);
- if (!entry) {
- pr_warn("debugfs dir <%s> creation failed\n", name);
- return;
- }
- pool->stat_dentry = entry;
-
- entry = debugfs_create_file("classes", S_IFREG | 0444,
- pool->stat_dentry, pool,
- &zs_stats_size_fops);
- if (!entry) {
- pr_warn("%s: debugfs file entry <%s> creation failed\n",
- name, "classes");
- debugfs_remove_recursive(pool->stat_dentry);
- pool->stat_dentry = NULL;
- }
+ pool->stat_dentry = debugfs_create_dir(name, zs_stat_root);
+
+ debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool,
+ &zs_stats_size_fops);
}
static void zs_pool_stat_destroy(struct zs_pool *pool)
} while ((page = get_next_page(page)) != NULL);
}
- static struct dentry *zs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int zs_init_fs_context(struct fs_context *fc)
{
- static const struct dentry_operations ops = {
- .d_dname = simple_dname,
- };
-
- return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC);
+ return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type zsmalloc_fs = {
.name = "zsmalloc",
- .mount = zs_mount,
+ .init_fs_context = zs_init_fs_context,
.kill_sb = kill_anon_super,
};
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* NET An implementation of the SOCKET network access protocol.
*
* Tigran Aivazian : Made listen(2) backlog sanity checks
* protocol-independent
*
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- *
* This module is effectively the top level interface to the BSD socket
* paradigm.
*
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/mount.h>
+ #include <linux/pseudo_fs.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <net/busy_poll.h>
#include <linux/errqueue.h>
-/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */
-#if IS_ENABLED(CONFIG_INET)
-#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
-#else
-#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__)
-#endif
-
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
unsigned int sysctl_net_busy_poll __read_mostly;
static struct inode *sock_alloc_inode(struct super_block *sb)
{
struct socket_alloc *ei;
- struct socket_wq *wq;
ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
- wq = kmalloc(sizeof(*wq), GFP_KERNEL);
- if (!wq) {
- kmem_cache_free(sock_inode_cachep, ei);
- return NULL;
- }
- init_waitqueue_head(&wq->wait);
- wq->fasync_list = NULL;
- wq->flags = 0;
- ei->socket.wq = wq;
+ init_waitqueue_head(&ei->socket.wq.wait);
+ ei->socket.wq.fasync_list = NULL;
+ ei->socket.wq.flags = 0;
ei->socket.state = SS_UNCONNECTED;
ei->socket.flags = 0;
return &ei->vfs_inode;
}
-static void sock_destroy_inode(struct inode *inode)
+static void sock_free_inode(struct inode *inode)
{
struct socket_alloc *ei;
ei = container_of(inode, struct socket_alloc, vfs_inode);
- kfree_rcu(ei->socket.wq, rcu);
kmem_cache_free(sock_inode_cachep, ei);
}
static const struct super_operations sockfs_ops = {
.alloc_inode = sock_alloc_inode,
- .destroy_inode = sock_destroy_inode,
+ .free_inode = sock_free_inode,
.statfs = simple_statfs,
};
NULL
};
- static struct dentry *sockfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int sockfs_init_fs_context(struct fs_context *fc)
{
- return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
- sockfs_xattr_handlers,
- &sockfs_dentry_operations, SOCKFS_MAGIC);
+ struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->ops = &sockfs_ops;
+ ctx->dops = &sockfs_dentry_operations;
+ ctx->xattr = sockfs_xattr_handlers;
+ return 0;
}
static struct vfsmount *sock_mnt __read_mostly;
static struct file_system_type sock_fs_type = {
.name = "sockfs",
- .mount = sockfs_mount,
+ .init_fs_context = sockfs_init_fs_context,
.kill_sb = kill_anon_super,
};
}
newfile = sock_alloc_file(sock, flags, NULL);
- if (likely(!IS_ERR(newfile))) {
+ if (!IS_ERR(newfile)) {
fd_install(fd, newfile);
return fd;
}
module_put(owner);
}
- if (sock->wq->fasync_list)
+ if (sock->wq.fasync_list)
pr_err("%s: fasync list not empty!\n", __func__);
if (!sock->file) {
}
EXPORT_SYMBOL(__sock_tx_timestamp);
-/**
- * sock_sendmsg - send a message through @sock
- * @sock: socket
- * @msg: message to send
- *
- * Sends @msg through @sock, passing through LSM.
- * Returns the number of bytes sent, or an error code.
- */
INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
size_t));
+INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
+ size_t));
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
- int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock,
- msg, msg_data_left(msg));
+ int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
+ inet_sendmsg, sock, msg,
+ msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);
return ret;
}
+/**
+ * sock_sendmsg - send a message through @sock
+ * @sock: socket
+ * @msg: message to send
+ *
+ * Sends @msg through @sock, passing through LSM.
+ * Returns the number of bytes sent, or an error code.
+ */
int sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
int err = security_socket_sendmsg(sock, msg,
}
EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
+INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
+ size_t, int));
+INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
+ size_t, int));
+static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
+ int flags)
+{
+ return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
+ inet_recvmsg, sock, msg, msg_data_left(msg),
+ flags);
+}
+
/**
* sock_recvmsg - receive a message from @sock
* @sock: socket
* Receives @msg from @sock, passing through LSM. Returns the total number
* of bytes received, or an error.
*/
-INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
- size_t , int ));
-static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
- int flags)
-{
- return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg,
- msg_data_left(msg), flags);
-}
-
int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
{
int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
{
struct socket *sock = filp->private_data;
struct sock *sk = sock->sk;
- struct socket_wq *wq;
+ struct socket_wq *wq = &sock->wq;
if (sk == NULL)
return -EINVAL;
lock_sock(sk);
- wq = sock->wq;
fasync_helper(fd, filp, on, &wq->fasync_list);
if (!wq->fasync_list)
static int __sys_setsockopt(int fd, int level, int optname,
char __user *optval, int optlen)
{
+ mm_segment_t oldfs = get_fs();
+ char *kernel_optval = NULL;
int err, fput_needed;
struct socket *sock;
if (err)
goto out_put;
+ err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
+ &optname, optval, &optlen,
+ &kernel_optval);
+
+ if (err < 0) {
+ goto out_put;
+ } else if (err > 0) {
+ err = 0;
+ goto out_put;
+ }
+
+ if (kernel_optval) {
+ set_fs(KERNEL_DS);
+ optval = (char __user __force *)kernel_optval;
+ }
+
if (level == SOL_SOCKET)
err =
sock_setsockopt(sock, level, optname, optval,
err =
sock->ops->setsockopt(sock, level, optname, optval,
optlen);
+
+ if (kernel_optval) {
+ set_fs(oldfs);
+ kfree(kernel_optval);
+ }
out_put:
fput_light(sock->file, fput_needed);
}
{
int err, fput_needed;
struct socket *sock;
+ int max_optlen;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
if (err)
goto out_put;
+ max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
+
if (level == SOL_SOCKET)
err =
sock_getsockopt(sock, level, optname, optval,
err =
sock->ops->getsockopt(sock, level, optname, optval,
optlen);
+
+ err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
+ optval, optlen,
+ max_optlen, err);
out_put:
fput_light(sock->file, fput_needed);
}
kmsg->msg_iocb = NULL;
- return import_iovec(save_addr ? READ : WRITE,
+ err = import_iovec(save_addr ? READ : WRITE,
msg.msg_iov, msg.msg_iovlen,
UIO_FASTIOV, iov, &kmsg->msg_iter);
+ return err < 0 ? err : 0;
}
static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
/*
* BSD sendmsg interface
*/
+long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
+ unsigned int flags)
+{
+ struct msghdr msg_sys;
+
+ return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
+}
long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
bool forbid_cmsg_compat)
* BSD recvmsg interface
*/
+long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
+ unsigned int flags)
+{
+ struct msghdr msg_sys;
+
+ return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
+}
+
long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
bool forbid_cmsg_compat)
{
+// SPDX-License-Identifier: GPL-2.0-only
/*
* net/sunrpc/rpc_pipe.c
*
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
+ #include <linux/fs_context.h>
#include <linux/namei.h>
#include <linux/fsnotify.h>
#include <linux/kernel.h>
dget(dentry);
ret = simple_rmdir(dir, dentry);
+ if (!ret)
+ fsnotify_rmdir(dir, dentry);
d_delete(dentry);
dput(dentry);
return ret;
dget(dentry);
ret = simple_unlink(dir, dentry);
+ if (!ret)
+ fsnotify_unlink(dir, dentry);
d_delete(dentry);
dput(dentry);
return ret;
}
static int
- rpc_fill_super(struct super_block *sb, void *data, int silent)
+ rpc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
struct dentry *root, *gssd_dentry;
- struct net *net = get_net(sb->s_fs_info);
+ struct net *net = sb->s_fs_info;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int err;
}
EXPORT_SYMBOL_GPL(gssd_running);
- static struct dentry *
- rpc_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int rpc_fs_get_tree(struct fs_context *fc)
+ {
+ fc->s_fs_info = get_net(fc->net_ns);
+ return vfs_get_super(fc, vfs_get_keyed_super, rpc_fill_super);
+ }
+
+ static void rpc_fs_free_fc(struct fs_context *fc)
{
- struct net *net = current->nsproxy->net_ns;
- return mount_ns(fs_type, flags, data, net, net->user_ns, rpc_fill_super);
+ if (fc->s_fs_info)
+ put_net(fc->s_fs_info);
+ }
+
+ static const struct fs_context_operations rpc_fs_context_ops = {
+ .free = rpc_fs_free_fc,
+ .get_tree = rpc_fs_get_tree,
+ };
+
+ static int rpc_init_fs_context(struct fs_context *fc)
+ {
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(fc->net_ns->user_ns);
+ fc->ops = &rpc_fs_context_ops;
+ return 0;
}
static void rpc_kill_sb(struct super_block *sb)
static struct file_system_type rpc_pipe_fs_type = {
.owner = THIS_MODULE,
.name = "rpc_pipefs",
- .mount = rpc_mount,
+ .init_fs_context = rpc_init_fs_context,
.kill_sb = rpc_kill_sb,
};
MODULE_ALIAS_FS("rpc_pipefs");
+// SPDX-License-Identifier: GPL-2.0-only
/*
* AppArmor security module
*
*
* Copyright (C) 1998-2008 Novell/SUSE
* Copyright 2009-2010 Canonical Ltd.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
*/
#include <linux/ctype.h>
#include <linux/capability.h>
#include <linux/rcupdate.h>
#include <linux/fs.h>
+ #include <linux/fs_context.h>
#include <linux/poll.h>
#include <uapi/linux/major.h>
#include <uapi/linux/magic.h>
.show_path = aafs_show_path,
};
- static int fill_super(struct super_block *sb, void *data, int silent)
+ static int apparmorfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
static struct tree_descr files[] = { {""} };
int error;
return 0;
}
- static struct dentry *aafs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int apparmorfs_get_tree(struct fs_context *fc)
{
- return mount_single(fs_type, flags, data, fill_super);
+ return get_tree_single(fc, apparmorfs_fill_super);
+ }
+
+ static const struct fs_context_operations apparmorfs_context_ops = {
+ .get_tree = apparmorfs_get_tree,
+ };
+
+ static int apparmorfs_init_fs_context(struct fs_context *fc)
+ {
+ fc->ops = &apparmorfs_context_ops;
+ return 0;
}
static struct file_system_type aafs_ops = {
.owner = THIS_MODULE,
.name = AAFS_NAME,
- .mount = aafs_mount,
+ .init_fs_context = apparmorfs_init_fs_context,
.kill_sb = kill_anon_super,
};
+// SPDX-License-Identifier: GPL-2.0-only
/*
* inode.c - securityfs
*
* Copyright (C) 2005 Greg Kroah-Hartman <gregkh@suse.de>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
* Based on fs/debugfs/inode.c which had the following copyright notice:
* Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
* Copyright (C) 2004 IBM Inc.
#include <linux/sysfs.h>
#include <linux/kobject.h>
#include <linux/fs.h>
+ #include <linux/fs_context.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
.free_inode = securityfs_free_inode,
};
- static int fill_super(struct super_block *sb, void *data, int silent)
+ static int securityfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
static const struct tree_descr files[] = {{""}};
int error;
return 0;
}
- static struct dentry *get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data)
+ static int securityfs_get_tree(struct fs_context *fc)
{
- return mount_single(fs_type, flags, data, fill_super);
+ return get_tree_single(fc, securityfs_fill_super);
+ }
+
+ static const struct fs_context_operations securityfs_context_ops = {
+ .get_tree = securityfs_get_tree,
+ };
+
+ static int securityfs_init_fs_context(struct fs_context *fc)
+ {
+ fc->ops = &securityfs_context_ops;
+ return 0;
}
static struct file_system_type fs_type = {
.owner = THIS_MODULE,
.name = "securityfs",
- .mount = get_sb,
+ .init_fs_context = securityfs_init_fs_context,
.kill_sb = kill_litter_super,
};
+// SPDX-License-Identifier: GPL-2.0-only
/* Updated: Karl MacMillan <kmacmillan@tresys.com>
*
* Added conditional policy language extensions
* Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
* Copyright (C) 2003 - 2004 Tresys Technology, LLC
* Copyright (C) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 2.
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+ #include <linux/fs_context.h>
#include <linux/mount.h>
#include <linux/mutex.h>
#include <linux/init.h>
selnl_notify_setenforce(new_value);
selinux_status_update_setenforce(state, new_value);
if (!new_value)
- call_lsm_notifier(LSM_POLICY_CHANGE, NULL);
+ call_blocking_lsm_notifier(LSM_POLICY_CHANGE, NULL);
}
length = count;
out:
#define NULL_FILE_NAME "null"
- static int sel_fill_super(struct super_block *sb, void *data, int silent)
+ static int sel_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct selinux_fs_info *fsi;
int ret;
return ret;
}
- static struct dentry *sel_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int sel_get_tree(struct fs_context *fc)
{
- return mount_single(fs_type, flags, data, sel_fill_super);
+ return get_tree_single(fc, sel_fill_super);
+ }
+
+ static const struct fs_context_operations sel_context_ops = {
+ .get_tree = sel_get_tree,
+ };
+
+ static int sel_init_fs_context(struct fs_context *fc)
+ {
+ fc->ops = &sel_context_ops;
+ return 0;
}
static void sel_kill_sb(struct super_block *sb)
static struct file_system_type sel_fs_type = {
.name = "selinuxfs",
- .mount = sel_mount,
+ .init_fs_context = sel_init_fs_context,
.kill_sb = sel_kill_sb,
};
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 2.
- *
* Authors:
* Casey Schaufler <casey@schaufler-ca.com>
* Ahmed S. Darwish <darwish.07@gmail.com>
*
* Karl MacMillan <kmacmillan@tresys.com>
* James Morris <jmorris@redhat.com>
- *
*/
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/audit.h>
#include <linux/magic.h>
+ #include <linux/fs_context.h>
#include "smack.h"
#define BEBITS (sizeof(__be32) * 8)
/**
* smk_fill_super - fill the smackfs superblock
* @sb: the empty superblock
- * @data: unused
- * @silent: unused
+ * @fc: unused
*
* Fill in the well known entries for the smack filesystem
*
* Returns 0 on success, an error code on failure
*/
- static int smk_fill_super(struct super_block *sb, void *data, int silent)
+ static int smk_fill_super(struct super_block *sb, struct fs_context *fc)
{
int rc;
}
/**
- * smk_mount - get the smackfs superblock
- * @fs_type: passed along without comment
- * @flags: passed along without comment
- * @dev_name: passed along without comment
- * @data: passed along without comment
+ * smk_get_tree - get the smackfs superblock
+ * @fc: The mount context, including any options
*
* Just passes everything along.
*
* Returns what the lower level code does.
*/
- static struct dentry *smk_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ static int smk_get_tree(struct fs_context *fc)
{
- return mount_single(fs_type, flags, data, smk_fill_super);
+ return get_tree_single(fc, smk_fill_super);
+ }
+
+ static const struct fs_context_operations smk_context_ops = {
+ .get_tree = smk_get_tree,
+ };
+
+ /**
+ * smk_init_fs_context - Initialise a filesystem context for smackfs
+ * @fc: The blank mount context
+ */
+ static int smk_init_fs_context(struct fs_context *fc)
+ {
+ fc->ops = &smk_context_ops;
+ return 0;
}
static struct file_system_type smk_fs_type = {
.name = "smackfs",
- .mount = smk_mount,
+ .init_fs_context = smk_init_fs_context,
.kill_sb = kill_litter_super,
};