scsi: sd: Use the block layer zone append emulation
authorDamien Le Moal <dlemoal@kernel.org>
Mon, 8 Apr 2024 01:41:13 +0000 (10:41 +0900)
committerJens Axboe <axboe@kernel.dk>
Wed, 17 Apr 2024 14:44:03 +0000 (08:44 -0600)
Set the request queue of a TYPE_ZBC device as needing zone append
emulation by setting the device queue max_zone_append_sectors limit to
0. This enables the block layer generic implementation provided by zone
write plugging. With this, the sd driver will never see a
REQ_OP_ZONE_APPEND request and the zone append emulation code
implemented in sd_zbc.c can be removed.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Hans Holmberg <hans.holmberg@wdc.com>
Tested-by: Dennis Maisenbacher <dennis.maisenbacher@wdc.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240408014128.205141-14-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/scsi/sd.c
drivers/scsi/sd.h
drivers/scsi/sd_zbc.c

index 3cf89867029044ede7b2f6a844dca974b8d50e32..dcba9530ffa5269e6321f0bacb244524e6be636d 100644 (file)
@@ -1260,12 +1260,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
                }
        }
 
-       if (req_op(rq) == REQ_OP_ZONE_APPEND) {
-               ret = sd_zbc_prepare_zone_append(cmd, &lba, nr_blocks);
-               if (ret)
-                       goto fail;
-       }
-
        fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0;
        dix = scsi_prot_sg_count(cmd);
        dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
@@ -1348,7 +1342,6 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
                return sd_setup_flush_cmnd(cmd);
        case REQ_OP_READ:
        case REQ_OP_WRITE:
-       case REQ_OP_ZONE_APPEND:
                return sd_setup_read_write_cmnd(cmd);
        case REQ_OP_ZONE_RESET:
                return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER,
@@ -3979,7 +3972,6 @@ static void scsi_disk_release(struct device *dev)
        struct scsi_disk *sdkp = to_scsi_disk(dev);
 
        ida_free(&sd_index_ida, sdkp->index);
-       sd_zbc_free_zone_info(sdkp);
        put_device(&sdkp->device->sdev_gendev);
        free_opal_dev(sdkp->opal_dev);
 
index 5c4285a582b220367d9eda5fabc4ca5115cc919d..49dd600bfa48252fc873884fd140209f705ffc4d 100644 (file)
@@ -104,12 +104,6 @@ struct scsi_disk {
         * between zone starting LBAs is constant.
         */
        u32             zone_starting_lba_gran;
-       u32             *zones_wp_offset;
-       spinlock_t      zones_wp_offset_lock;
-       u32             *rev_wp_offset;
-       struct mutex    rev_mutex;
-       struct work_struct zone_wp_offset_work;
-       char            *zone_wp_update_buf;
 #endif
        atomic_t        openers;
        sector_t        capacity;       /* size in logical blocks */
@@ -245,7 +239,6 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
 
 #ifdef CONFIG_BLK_DEV_ZONED
 
-void sd_zbc_free_zone_info(struct scsi_disk *sdkp);
 int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]);
 int sd_zbc_revalidate_zones(struct scsi_disk *sdkp);
 blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
@@ -255,13 +248,8 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
 int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
                unsigned int nr_zones, report_zones_cb cb, void *data);
 
-blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
-                                       unsigned int nr_blocks);
-
 #else /* CONFIG_BLK_DEV_ZONED */
 
-static inline void sd_zbc_free_zone_info(struct scsi_disk *sdkp) {}
-
 static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
 {
        return 0;
@@ -285,13 +273,6 @@ static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd,
        return good_bytes;
 }
 
-static inline blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd,
-                                                     sector_t *lba,
-                                                     unsigned int nr_blocks)
-{
-       return BLK_STS_TARGET;
-}
-
 #define sd_zbc_report_zones NULL
 
 #endif /* CONFIG_BLK_DEV_ZONED */
index 26af5ab7d7c173cc4553162e5afaa814d8f181e0..d0ead98589544ec51d54532bb8b0c8fd2257dd4d 100644 (file)
 #define CREATE_TRACE_POINTS
 #include "sd_trace.h"
 
-/**
- * sd_zbc_get_zone_wp_offset - Get zone write pointer offset.
- * @zone: Zone for which to return the write pointer offset.
- *
- * Return: offset of the write pointer from the start of the zone.
- */
-static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
-{
-       if (zone->type == ZBC_ZONE_TYPE_CONV)
-               return 0;
-
-       switch (zone->cond) {
-       case BLK_ZONE_COND_IMP_OPEN:
-       case BLK_ZONE_COND_EXP_OPEN:
-       case BLK_ZONE_COND_CLOSED:
-               return zone->wp - zone->start;
-       case BLK_ZONE_COND_FULL:
-               return zone->len;
-       case BLK_ZONE_COND_EMPTY:
-       case BLK_ZONE_COND_OFFLINE:
-       case BLK_ZONE_COND_READONLY:
-       default:
-               /*
-                * Offline and read-only zones do not have a valid
-                * write pointer. Use 0 as for an empty zone.
-                */
-               return 0;
-       }
-}
-
 /* Whether or not a SCSI zone descriptor describes a gap zone. */
 static bool sd_zbc_is_gap_zone(const u8 buf[64])
 {
@@ -121,9 +91,6 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64],
        if (ret)
                return ret;
 
-       if (sdkp->rev_wp_offset)
-               sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
-
        return 0;
 }
 
@@ -347,123 +314,6 @@ static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
        return BLK_STS_OK;
 }
 
-#define SD_ZBC_INVALID_WP_OFST (~0u)
-#define SD_ZBC_UPDATING_WP_OFST        (SD_ZBC_INVALID_WP_OFST - 1)
-
-static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
-                                   void *data)
-{
-       struct scsi_disk *sdkp = data;
-
-       lockdep_assert_held(&sdkp->zones_wp_offset_lock);
-
-       sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
-
-       return 0;
-}
-
-/*
- * An attempt to append a zone triggered an invalid write pointer error.
- * Reread the write pointer of the zone(s) in which the append failed.
- */
-static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
-{
-       struct scsi_disk *sdkp;
-       unsigned long flags;
-       sector_t zno;
-       int ret;
-
-       sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
-
-       spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
-       for (zno = 0; zno < sdkp->zone_info.nr_zones; zno++) {
-               if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
-                       continue;
-
-               spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
-               ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
-                                            SD_BUF_SIZE,
-                                            zno * sdkp->zone_info.zone_blocks, true);
-               spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
-               if (!ret)
-                       sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
-                                           zno, sd_zbc_update_wp_offset_cb,
-                                           sdkp);
-       }
-       spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
-
-       scsi_device_put(sdkp->device);
-}
-
-/**
- * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
- * @cmd: the command to setup
- * @lba: the LBA to patch
- * @nr_blocks: the number of LBAs to be written
- *
- * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
- * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
- * patching of the lba for an emulated ZONE_APPEND command.
- *
- * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
- * schedule a REPORT ZONES command and return BLK_STS_IOERR.
- */
-blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
-                                       unsigned int nr_blocks)
-{
-       struct request *rq = scsi_cmd_to_rq(cmd);
-       struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
-       unsigned int wp_offset, zno = blk_rq_zone_no(rq);
-       unsigned long flags;
-       blk_status_t ret;
-
-       ret = sd_zbc_cmnd_checks(cmd);
-       if (ret != BLK_STS_OK)
-               return ret;
-
-       if (!blk_rq_zone_is_seq(rq))
-               return BLK_STS_IOERR;
-
-       /* Unlock of the write lock will happen in sd_zbc_complete() */
-       if (!blk_req_zone_write_trylock(rq))
-               return BLK_STS_ZONE_RESOURCE;
-
-       spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
-       wp_offset = sdkp->zones_wp_offset[zno];
-       switch (wp_offset) {
-       case SD_ZBC_INVALID_WP_OFST:
-               /*
-                * We are about to schedule work to update a zone write pointer
-                * offset, which will cause the zone append command to be
-                * requeued. So make sure that the scsi device does not go away
-                * while the work is being processed.
-                */
-               if (scsi_device_get(sdkp->device)) {
-                       ret = BLK_STS_IOERR;
-                       break;
-               }
-               sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
-               schedule_work(&sdkp->zone_wp_offset_work);
-               fallthrough;
-       case SD_ZBC_UPDATING_WP_OFST:
-               ret = BLK_STS_DEV_RESOURCE;
-               break;
-       default:
-               wp_offset = sectors_to_logical(sdkp->device, wp_offset);
-               if (wp_offset + nr_blocks > sdkp->zone_info.zone_blocks) {
-                       ret = BLK_STS_IOERR;
-                       break;
-               }
-
-               trace_scsi_prepare_zone_append(cmd, *lba, wp_offset);
-               *lba += wp_offset;
-       }
-       spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
-       if (ret)
-               blk_req_zone_write_unlock(rq);
-       return ret;
-}
-
 /**
  * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
  *                     can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
@@ -504,96 +354,6 @@ blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
        return BLK_STS_OK;
 }
 
-static bool sd_zbc_need_zone_wp_update(struct request *rq)
-{
-       switch (req_op(rq)) {
-       case REQ_OP_ZONE_APPEND:
-       case REQ_OP_ZONE_FINISH:
-       case REQ_OP_ZONE_RESET:
-       case REQ_OP_ZONE_RESET_ALL:
-               return true;
-       case REQ_OP_WRITE:
-       case REQ_OP_WRITE_ZEROES:
-               return blk_rq_zone_is_seq(rq);
-       default:
-               return false;
-       }
-}
-
-/**
- * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
- * @cmd: Completed command
- * @good_bytes: Command reply bytes
- *
- * Called from sd_zbc_complete() to handle the update of the cached zone write
- * pointer value in case an update is needed.
- */
-static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
-                                         unsigned int good_bytes)
-{
-       int result = cmd->result;
-       struct request *rq = scsi_cmd_to_rq(cmd);
-       struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
-       unsigned int zno = blk_rq_zone_no(rq);
-       enum req_op op = req_op(rq);
-       unsigned long flags;
-
-       /*
-        * If we got an error for a command that needs updating the write
-        * pointer offset cache, we must mark the zone wp offset entry as
-        * invalid to force an update from disk the next time a zone append
-        * command is issued.
-        */
-       spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
-
-       if (result && op != REQ_OP_ZONE_RESET_ALL) {
-               if (op == REQ_OP_ZONE_APPEND) {
-                       /* Force complete completion (no retry) */
-                       good_bytes = 0;
-                       scsi_set_resid(cmd, blk_rq_bytes(rq));
-               }
-
-               /*
-                * Force an update of the zone write pointer offset on
-                * the next zone append access.
-                */
-               if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
-                       sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
-               goto unlock_wp_offset;
-       }
-
-       switch (op) {
-       case REQ_OP_ZONE_APPEND:
-               trace_scsi_zone_wp_update(cmd, rq->__sector,
-                                 sdkp->zones_wp_offset[zno], good_bytes);
-               rq->__sector += sdkp->zones_wp_offset[zno];
-               fallthrough;
-       case REQ_OP_WRITE_ZEROES:
-       case REQ_OP_WRITE:
-               if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
-                       sdkp->zones_wp_offset[zno] +=
-                                               good_bytes >> SECTOR_SHIFT;
-               break;
-       case REQ_OP_ZONE_RESET:
-               sdkp->zones_wp_offset[zno] = 0;
-               break;
-       case REQ_OP_ZONE_FINISH:
-               sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
-               break;
-       case REQ_OP_ZONE_RESET_ALL:
-               memset(sdkp->zones_wp_offset, 0,
-                      sdkp->zone_info.nr_zones * sizeof(unsigned int));
-               break;
-       default:
-               break;
-       }
-
-unlock_wp_offset:
-       spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
-
-       return good_bytes;
-}
-
 /**
  * sd_zbc_complete - ZBC command post processing.
  * @cmd: Completed command
@@ -619,11 +379,7 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
                 * so be quiet about the error.
                 */
                rq->rq_flags |= RQF_QUIET;
-       } else if (sd_zbc_need_zone_wp_update(rq))
-               good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
-
-       if (req_op(rq) == REQ_OP_ZONE_APPEND)
-               blk_req_zone_write_unlock(rq);
+       }
 
        return good_bytes;
 }
@@ -780,46 +536,6 @@ static void sd_zbc_print_zones(struct scsi_disk *sdkp)
                          sdkp->zone_info.zone_blocks);
 }
 
-static int sd_zbc_init_disk(struct scsi_disk *sdkp)
-{
-       sdkp->zones_wp_offset = NULL;
-       spin_lock_init(&sdkp->zones_wp_offset_lock);
-       sdkp->rev_wp_offset = NULL;
-       mutex_init(&sdkp->rev_mutex);
-       INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
-       sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
-       if (!sdkp->zone_wp_update_buf)
-               return -ENOMEM;
-
-       return 0;
-}
-
-void sd_zbc_free_zone_info(struct scsi_disk *sdkp)
-{
-       if (!sdkp->zone_wp_update_buf)
-               return;
-
-       /* Serialize against revalidate zones */
-       mutex_lock(&sdkp->rev_mutex);
-
-       kvfree(sdkp->zones_wp_offset);
-       sdkp->zones_wp_offset = NULL;
-       kfree(sdkp->zone_wp_update_buf);
-       sdkp->zone_wp_update_buf = NULL;
-
-       sdkp->early_zone_info = (struct zoned_disk_info){ };
-       sdkp->zone_info = (struct zoned_disk_info){ };
-
-       mutex_unlock(&sdkp->rev_mutex);
-}
-
-static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
-{
-       struct scsi_disk *sdkp = scsi_disk(disk);
-
-       swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
-}
-
 /*
  * Call blk_revalidate_disk_zones() if any of the zoned disk properties have
  * changed that make it necessary to call that function. Called by
@@ -831,18 +547,8 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
        struct request_queue *q = disk->queue;
        u32 zone_blocks = sdkp->early_zone_info.zone_blocks;
        unsigned int nr_zones = sdkp->early_zone_info.nr_zones;
-       int ret = 0;
        unsigned int flags;
-
-       /*
-        * For all zoned disks, initialize zone append emulation data if not
-        * already done.
-        */
-       if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) {
-               ret = sd_zbc_init_disk(sdkp);
-               if (ret)
-                       return ret;
-       }
+       int ret;
 
        /*
         * There is nothing to do for regular disks, including host-aware disks
@@ -851,50 +557,32 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
        if (!blk_queue_is_zoned(q))
                return 0;
 
-       /*
-        * Make sure revalidate zones are serialized to ensure exclusive
-        * updates of the scsi disk data.
-        */
-       mutex_lock(&sdkp->rev_mutex);
-
        if (sdkp->zone_info.zone_blocks == zone_blocks &&
            sdkp->zone_info.nr_zones == nr_zones &&
            disk->nr_zones == nr_zones)
-               goto unlock;
+               return 0;
 
-       flags = memalloc_noio_save();
        sdkp->zone_info.zone_blocks = zone_blocks;
        sdkp->zone_info.nr_zones = nr_zones;
-       sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL);
-       if (!sdkp->rev_wp_offset) {
-               ret = -ENOMEM;
-               memalloc_noio_restore(flags);
-               goto unlock;
-       }
 
        blk_queue_chunk_sectors(q,
                        logical_to_sectors(sdkp->device, zone_blocks));
-       blk_queue_max_zone_append_sectors(q,
-                       q->limits.max_segments << PAGE_SECTORS_SHIFT);
 
-       ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
+       /* Enable block layer zone append emulation */
+       blk_queue_max_zone_append_sectors(q, 0);
 
+       flags = memalloc_noio_save();
+       ret = blk_revalidate_disk_zones(disk, NULL);
        memalloc_noio_restore(flags);
-       kvfree(sdkp->rev_wp_offset);
-       sdkp->rev_wp_offset = NULL;
-
        if (ret) {
                sdkp->zone_info = (struct zoned_disk_info){ };
                sdkp->capacity = 0;
-               goto unlock;
+               return ret;
        }
 
        sd_zbc_print_zones(sdkp);
 
-unlock:
-       mutex_unlock(&sdkp->rev_mutex);
-
-       return ret;
+       return 0;
 }
 
 /**
@@ -917,10 +605,8 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
        if (!sd_is_zoned(sdkp)) {
                /*
                 * Device managed or normal SCSI disk, no special handling
-                * required. Nevertheless, free the disk zone information in
-                * case the device type changed.
+                * required.
                 */
-               sd_zbc_free_zone_info(sdkp);
                return 0;
        }
 
@@ -941,7 +627,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
 
        /* The drive satisfies the kernel restrictions: set it up */
        blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
-       blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
        if (sdkp->zones_max_open == U32_MAX)
                disk_set_max_open_zones(disk, 0);
        else