btrfs: add read policy to set a preferred device
authorAnand Jain <anand.jain@oracle.com>
Wed, 1 Jan 2025 18:06:36 +0000 (02:06 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 13 Jan 2025 13:53:21 +0000 (14:53 +0100)
Add read policy that will force all reads to go to the given device
(specified by devid) on the RAID1 profiles.

This will be used for testing, e.g. to read from stale device. Users may
find other use cases.

Can be set in sysfs, the value format is "devid:<devid>" to the file

  /sys/fs/btrfs/FSID/read_policy

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/sysfs.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index e155b7ce1ee58460f865e190bb8befa45ed29414..5211d13d73f8aa4ddd051b557363ffe78c300215 100644 (file)
@@ -1309,6 +1309,7 @@ static const char *btrfs_read_policy_name[] = {
        "pid",
 #ifdef CONFIG_BTRFS_EXPERIMENTAL
        "round-robin",
+       "devid",
 #endif
 };
 
@@ -1364,8 +1365,11 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj,
                if (i == BTRFS_READ_POLICY_RR)
                        ret += sysfs_emit_at(buf, ret, ":%u",
                                             READ_ONCE(fs_devices->rr_min_contig_read));
-#endif
 
+               if (i == BTRFS_READ_POLICY_DEVID)
+                       ret += sysfs_emit_at(buf, ret, ":%llu",
+                                            READ_ONCE(fs_devices->read_devid));
+#endif
                if (i == policy)
                        ret += sysfs_emit_at(buf, ret, "]");
        }
@@ -1421,6 +1425,31 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
 
                return len;
        }
+
+       if (index == BTRFS_READ_POLICY_DEVID) {
+               if (value != -1) {
+                       BTRFS_DEV_LOOKUP_ARGS(args);
+
+                       /* Validate input devid. */
+                       args.devid = value;
+                       if (btrfs_find_device(fs_devices, &args) == NULL)
+                               return -EINVAL;
+               } else {
+                       /* Set default devid to the devid of the latest device. */
+                       value = fs_devices->latest_dev->devid;
+               }
+
+               if (index != READ_ONCE(fs_devices->read_policy) ||
+                   value != READ_ONCE(fs_devices->read_devid)) {
+                       WRITE_ONCE(fs_devices->read_policy, index);
+                       WRITE_ONCE(fs_devices->read_devid, value);
+
+                       btrfs_info(fs_devices->fs_info, "read policy set to '%s:%llu'",
+                                  btrfs_read_policy_name[index], value);
+               }
+
+               return len;
+       }
 #endif
        if (index != READ_ONCE(fs_devices->read_policy)) {
                WRITE_ONCE(fs_devices->read_policy, index);
index cfe1d5ada5f248e68be1b6bf86d72be044006782..b5fd1aa45c4c838dbd84fbcd09bb685a0032e62f 100644 (file)
@@ -1331,6 +1331,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
        fs_devices->read_policy = BTRFS_READ_POLICY_PID;
 #ifdef CONFIG_BTRFS_EXPERIMENTAL
        fs_devices->rr_min_contig_read = BTRFS_DEFAULT_RR_MIN_CONTIG_READ;
+       fs_devices->read_devid = latest_dev->devid;
 #endif
 
        return 0;
@@ -5957,6 +5958,19 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 }
 
 #ifdef CONFIG_BTRFS_EXPERIMENTAL
+static int btrfs_read_preferred(struct btrfs_chunk_map *map, int first, int num_stripes)
+{
+       for (int index = first; index < first + num_stripes; index++) {
+               const struct btrfs_device *device = map->stripes[index].dev;
+
+               if (device->devid == READ_ONCE(device->fs_devices->read_devid))
+                       return index;
+       }
+
+       /* If no read-preferred device is set use the first stripe. */
+       return first;
+}
+
 struct stripe_mirror {
        u64 devid;
        int num;
@@ -6046,6 +6060,9 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
        case BTRFS_READ_POLICY_RR:
                preferred_mirror = btrfs_read_rr(map, first, num_stripes);
                break;
+       case BTRFS_READ_POLICY_DEVID:
+               preferred_mirror = btrfs_read_preferred(map, first, num_stripes);
+               break;
 #endif
        }
 
index f9fe698a9b4b405e2ae9b3d59c825bf73ee568f3..120f65e21eeb29660ccfb57605229f03a7a30f37 100644 (file)
@@ -309,6 +309,8 @@ enum btrfs_read_policy {
 #ifdef CONFIG_BTRFS_EXPERIMENTAL
        /* Balancing RAID1 reads across all striped devices (round-robin). */
        BTRFS_READ_POLICY_RR,
+       /* Read from a specific device. */
+       BTRFS_READ_POLICY_DEVID,
 #endif
        BTRFS_NR_READ_POLICY,
 };
@@ -446,6 +448,9 @@ struct btrfs_fs_devices {
         */
        u32 rr_min_contig_read;
 
+       /* Device to be used for reading in case of RAID1. */
+       u64 read_devid;
+
        /* Checksum mode - offload it or do it synchronously. */
        enum btrfs_offload_csum_mode offload_csum_mode;
 #endif