struct pgpath *current_pgpath;
struct priority_group *current_pg;
struct priority_group *next_pg; /* Switch to this PG if set */
+ struct priority_group *last_probed_pg;
atomic_t nr_valid_paths; /* Total number of usable paths */
unsigned int nr_priority_groups;
const char *hw_handler_name;
char *hw_handler_params;
wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
+ wait_queue_head_t probe_wait; /* Wait for probing paths */
unsigned int pg_init_retries; /* Number of times to retry pg_init */
unsigned int pg_init_delay_msecs; /* Number of msecs before pg_init retry */
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
struct bio_list queued_bios;
struct timer_list nopath_timer; /* Timeout for queue_if_no_path */
+ bool is_suspending;
};
/*
#define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */
#define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */
#define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */
+#define MPATHF_DELAY_PG_SWITCH 7 /* Delay switching pg if it still has paths */
+#define MPATHF_NEED_PG_SWITCH 8 /* Need to switch pgs after the delay has ended */
static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m)
{
atomic_set(&m->pg_init_count, 0);
m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
init_waitqueue_head(&m->pg_init_wait);
+ init_waitqueue_head(&m->probe_wait);
return 0;
}
goto failed;
}
+ /* Don't change PG until it has no remaining paths */
+ pg = READ_ONCE(m->current_pg);
+ if (pg) {
+ pgpath = choose_path_in_pg(m, pg, nr_bytes);
+ if (!IS_ERR_OR_NULL(pgpath))
+ return pgpath;
+ }
+
/* Were we instructed to switch PG? */
if (READ_ONCE(m->next_pg)) {
spin_lock_irqsave(&m->lock, flags);
pg = m->next_pg;
if (!pg) {
spin_unlock_irqrestore(&m->lock, flags);
- goto check_current_pg;
+ goto check_all_pgs;
}
m->next_pg = NULL;
spin_unlock_irqrestore(&m->lock, flags);
if (!IS_ERR_OR_NULL(pgpath))
return pgpath;
}
-
- /* Don't change PG until it has no remaining paths */
-check_current_pg:
- pg = READ_ONCE(m->current_pg);
- if (pg) {
- pgpath = choose_path_in_pg(m, pg, nr_bytes);
- if (!IS_ERR_OR_NULL(pgpath))
- return pgpath;
- }
-
+check_all_pgs:
/*
* Loop through priority groups until we find a valid path.
* First time we skip PGs marked 'bypassed'.
* Temporarily try to avoid having to use the specified PG
*/
static void bypass_pg(struct multipath *m, struct priority_group *pg,
- bool bypassed)
+ bool bypassed, bool can_be_delayed)
{
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
pg->bypassed = bypassed;
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (can_be_delayed && test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags))
+ set_bit(MPATHF_NEED_PG_SWITCH, &m->flags);
+ else {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
spin_unlock_irqrestore(&m->lock, flags);
if (--pgnum)
continue;
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags))
+ set_bit(MPATHF_NEED_PG_SWITCH, &m->flags);
+ else {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
m->next_pg = pg;
}
spin_unlock_irqrestore(&m->lock, flags);
break;
}
- bypass_pg(m, pg, bypassed);
+ bypass_pg(m, pg, bypassed, true);
return 0;
}
* Probably doing something like FW upgrade on the
* controller so try the other pg.
*/
- bypass_pg(m, pg, true);
+ bypass_pg(m, pg, true, false);
break;
case SCSI_DH_RETRY:
/* Wait before retrying. */
static void multipath_presuspend(struct dm_target *ti)
{
struct multipath *m = ti->private;
+ unsigned long flags;
+ spin_lock_irqsave(&m->lock, flags);
+ m->is_suspending = true;
+ spin_unlock_irqrestore(&m->lock, flags);
/* FIXME: bio-based shouldn't need to always disable queue_if_no_path */
if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti))
queue_if_no_path(m, false, true, __func__);
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
+ m->is_suspending = false;
if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) {
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
DMEMIT("%u ", m->nr_priority_groups);
- if (m->next_pg)
- pg_num = m->next_pg->pg_num;
- else if (m->current_pg)
+ if (m->current_pg)
pg_num = m->current_pg->pg_num;
+ else if (m->next_pg)
+ pg_num = m->next_pg->pg_num;
else
pg_num = (m->nr_priority_groups ? 1 : 0);
static int probe_active_paths(struct multipath *m)
{
struct pgpath *pgpath;
- struct priority_group *pg;
+ struct priority_group *pg = NULL;
unsigned long flags;
int r = 0;
- mutex_lock(&m->work_mutex);
-
spin_lock_irqsave(&m->lock, flags);
- if (test_bit(MPATHF_QUEUE_IO, &m->flags))
- pg = NULL;
- else
- pg = m->current_pg;
+ if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags)) {
+ wait_event_lock_irq(m->probe_wait,
+ !test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags),
+ m->lock);
+ /*
+ * if we waited because a probe was already in progress,
+ * and it probed the current active pathgroup, don't
+ * reprobe. Just return the number of valid paths
+ */
+ if (m->current_pg == m->last_probed_pg)
+ goto skip_probe;
+ }
+ if (!m->current_pg || m->is_suspending ||
+ test_bit(MPATHF_QUEUE_IO, &m->flags))
+ goto skip_probe;
+ set_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
+ pg = m->last_probed_pg = m->current_pg;
spin_unlock_irqrestore(&m->lock, flags);
- if (pg) {
- list_for_each_entry(pgpath, &pg->pgpaths, list) {
- if (!pgpath->is_active)
- continue;
+ list_for_each_entry(pgpath, &pg->pgpaths, list) {
+ if (pg != READ_ONCE(m->current_pg) ||
+ READ_ONCE(m->is_suspending))
+ goto out;
+ if (!pgpath->is_active)
+ continue;
- r = probe_path(pgpath);
- if (r < 0)
- goto out;
- }
+ r = probe_path(pgpath);
+ if (r < 0)
+ goto out;
}
- if (!atomic_read(&m->nr_valid_paths))
- r = -ENOTCONN;
-
out:
- mutex_unlock(&m->work_mutex);
+ spin_lock_irqsave(&m->lock, flags);
+ clear_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
+ if (test_and_clear_bit(MPATHF_NEED_PG_SWITCH, &m->flags)) {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
+skip_probe:
+ if (r == 0 && !atomic_read(&m->nr_valid_paths))
+ r = -ENOTCONN;
+ spin_unlock_irqrestore(&m->lock, flags);
+ if (pg)
+ wake_up(&m->probe_wait);
return r;
}