[Bionic][PATCH v2 00/21] blk-mq scheduler fixes

classic Classic list List threaded Threaded
24 messages Options
12
Reply | Threaded
Open this post in threaded view
|

[Bionic][PATCH v2 20/21] scsi: virtio_scsi: fix IO hang caused by automatic irq vector affinity

Jose Ricardo Ziviani
From: Ming Lei <[hidden email]>

Since commit 84676c1f21e8ff5 ("genirq/affinity: assign vectors to all
possible CPUs") it is possible to end up in a scenario where only
offline CPUs are mapped to an interrupt vector.

This is only an issue for the legacy I/O path since with blk-mq/scsi-mq
an I/O can't be submitted to a hardware queue if the queue isn't mapped
to an online CPU.

Fix this issue by forcing virtio-scsi to use blk-mq.

[mkp: commit desc]

Cc: Omar Sandoval <[hidden email]>,
Cc: "Martin K. Petersen" <[hidden email]>,
Cc: James Bottomley <[hidden email]>,
Cc: Christoph Hellwig <[hidden email]>,
Cc: Don Brace <[hidden email]>
Cc: Kashyap Desai <[hidden email]>
Cc: Paolo Bonzini <[hidden email]>
Cc: Mike Snitzer <[hidden email]>
Cc: Laurence Oberman <[hidden email]>
Fixes: 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs")
Signed-off-by: Ming Lei <[hidden email]>
Reviewed-by: Hannes Reinecke <[hidden email]>
Acked-by: Paolo Bonzini <[hidden email]>
Reviewed-by: Christoph Hellwig <[hidden email]>
Signed-off-by: Martin K. Petersen <[hidden email]>
(cherry picked from commit b5b6e8c8d3b4cbeb447a0f10c7d5de3caa573299)
Signed-off-by: Jose Ricardo Ziviani <[hidden email]>
---
 drivers/scsi/virtio_scsi.c | 59 ++------------------------------------
 1 file changed, 3 insertions(+), 56 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 7c28e8d4955a..54e3a0f6844c 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -91,9 +91,6 @@ struct virtio_scsi_vq {
 struct virtio_scsi_target_state {
  seqcount_t tgt_seq;
 
- /* Count of outstanding requests. */
- atomic_t reqs;
-
  /* Currently active virtqueue for requests sent to this target. */
  struct virtio_scsi_vq *req_vq;
 };
@@ -152,8 +149,6 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
  struct virtio_scsi_cmd *cmd = buf;
  struct scsi_cmnd *sc = cmd->sc;
  struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd;
- struct virtio_scsi_target_state *tgt =
- scsi_target(sc->device)->hostdata;
 
  dev_dbg(&sc->device->sdev_gendev,
  "cmd %p response %u status %#02x sense_len %u\n",
@@ -210,8 +205,6 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
  }
 
  sc->scsi_done(sc);
-
- atomic_dec(&tgt->reqs);
 }
 
 static void virtscsi_vq_done(struct virtio_scsi *vscsi,
@@ -580,10 +573,7 @@ static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
  struct scsi_cmnd *sc)
 {
  struct virtio_scsi *vscsi = shost_priv(sh);
- struct virtio_scsi_target_state *tgt =
- scsi_target(sc->device)->hostdata;
 
- atomic_inc(&tgt->reqs);
  return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], sc);
 }
 
@@ -596,55 +586,11 @@ static struct virtio_scsi_vq *virtscsi_pick_vq_mq(struct virtio_scsi *vscsi,
  return &vscsi->req_vqs[hwq];
 }
 
-static struct virtio_scsi_vq *virtscsi_pick_vq(struct virtio_scsi *vscsi,
-       struct virtio_scsi_target_state *tgt)
-{
- struct virtio_scsi_vq *vq;
- unsigned long flags;
- u32 queue_num;
-
- local_irq_save(flags);
- if (atomic_inc_return(&tgt->reqs) > 1) {
- unsigned long seq;
-
- do {
- seq = read_seqcount_begin(&tgt->tgt_seq);
- vq = tgt->req_vq;
- } while (read_seqcount_retry(&tgt->tgt_seq, seq));
- } else {
- /* no writes can be concurrent because of atomic_t */
- write_seqcount_begin(&tgt->tgt_seq);
-
- /* keep previous req_vq if a reader just arrived */
- if (unlikely(atomic_read(&tgt->reqs) > 1)) {
- vq = tgt->req_vq;
- goto unlock;
- }
-
- queue_num = smp_processor_id();
- while (unlikely(queue_num >= vscsi->num_queues))
- queue_num -= vscsi->num_queues;
- tgt->req_vq = vq = &vscsi->req_vqs[queue_num];
- unlock:
- write_seqcount_end(&tgt->tgt_seq);
- }
- local_irq_restore(flags);
-
- return vq;
-}
-
 static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
        struct scsi_cmnd *sc)
 {
  struct virtio_scsi *vscsi = shost_priv(sh);
- struct virtio_scsi_target_state *tgt =
- scsi_target(sc->device)->hostdata;
- struct virtio_scsi_vq *req_vq;
-
- if (shost_use_blk_mq(sh))
- req_vq = virtscsi_pick_vq_mq(vscsi, sc);
- else
- req_vq = virtscsi_pick_vq(vscsi, tgt);
+ struct virtio_scsi_vq *req_vq = virtscsi_pick_vq_mq(vscsi, sc);
 
  return virtscsi_queuecommand(vscsi, req_vq, sc);
 }
@@ -775,7 +721,6 @@ static int virtscsi_target_alloc(struct scsi_target *starget)
  return -ENOMEM;
 
  seqcount_init(&tgt->tgt_seq);
- atomic_set(&tgt->reqs, 0);
  tgt->req_vq = &vscsi->req_vqs[0];
 
  starget->hostdata = tgt;
@@ -823,6 +768,7 @@ static struct scsi_host_template virtscsi_host_template_single = {
  .target_alloc = virtscsi_target_alloc,
  .target_destroy = virtscsi_target_destroy,
  .track_queue_depth = 1,
+ .force_blk_mq = 1,
 };
 
 static struct scsi_host_template virtscsi_host_template_multi = {
@@ -844,6 +790,7 @@ static struct scsi_host_template virtscsi_host_template_multi = {
  .target_destroy = virtscsi_target_destroy,
  .map_queues = virtscsi_map_queues,
  .track_queue_depth = 1,
+ .force_blk_mq = 1,
 };
 
 #define virtscsi_config_get(vdev, fld) \
--
2.17.0


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[Bionic][PATCH v2 21/21] scsi: virtio_scsi: unify scsi_host_template

Jose Ricardo Ziviani
In reply to this post by Jose Ricardo Ziviani
From: Ming Lei <[hidden email]>

Now that virtio_scsi uses blk-mq exclusively, we can remove the
scsi_host_template and associated plumbing for the legacy I/O path.

[mkp: commit desc]

Cc: Omar Sandoval <[hidden email]>,
Cc: "Martin K. Petersen" <[hidden email]>,
Cc: James Bottomley <[hidden email]>,
Cc: Christoph Hellwig <[hidden email]>,
Cc: Paolo Bonzini <[hidden email]>
Cc: Mike Snitzer <[hidden email]>
Cc: Laurence Oberman <[hidden email]>
Cc: Hannes Reinecke <[hidden email]>
Signed-off-by: Ming Lei <[hidden email]>
Suggested-by: Christoph Hellwig <[hidden email]>,
Reviewed-by: Christoph Hellwig <[hidden email]>
Signed-off-by: Martin K. Petersen <[hidden email]>
(cherry picked from commit c3506df85091ab41cf7716244c460e15136100c4)
Signed-off-by: Jose Ricardo Ziviani <[hidden email]>
---
 drivers/scsi/virtio_scsi.c | 74 ++++++++------------------------------
 1 file changed, 15 insertions(+), 59 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 54e3a0f6844c..45d04631888a 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -522,11 +522,20 @@ static void virtio_scsi_init_hdr_pi(struct virtio_device *vdev,
 }
 #endif
 
-static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
- struct virtio_scsi_vq *req_vq,
+static struct virtio_scsi_vq *virtscsi_pick_vq_mq(struct virtio_scsi *vscsi,
+  struct scsi_cmnd *sc)
+{
+ u32 tag = blk_mq_unique_tag(sc->request);
+ u16 hwq = blk_mq_unique_tag_to_hwq(tag);
+
+ return &vscsi->req_vqs[hwq];
+}
+
+static int virtscsi_queuecommand(struct Scsi_Host *shost,
  struct scsi_cmnd *sc)
 {
- struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
+ struct virtio_scsi *vscsi = shost_priv(shost);
+ struct virtio_scsi_vq *req_vq = virtscsi_pick_vq_mq(vscsi, sc);
  struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc);
  unsigned long flags;
  int req_size;
@@ -569,32 +578,6 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
  return 0;
 }
 
-static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
- struct scsi_cmnd *sc)
-{
- struct virtio_scsi *vscsi = shost_priv(sh);
-
- return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], sc);
-}
-
-static struct virtio_scsi_vq *virtscsi_pick_vq_mq(struct virtio_scsi *vscsi,
-  struct scsi_cmnd *sc)
-{
- u32 tag = blk_mq_unique_tag(sc->request);
- u16 hwq = blk_mq_unique_tag_to_hwq(tag);
-
- return &vscsi->req_vqs[hwq];
-}
-
-static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
-       struct scsi_cmnd *sc)
-{
- struct virtio_scsi *vscsi = shost_priv(sh);
- struct virtio_scsi_vq *req_vq = virtscsi_pick_vq_mq(vscsi, sc);
-
- return virtscsi_queuecommand(vscsi, req_vq, sc);
-}
-
 static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
 {
  DECLARE_COMPLETION_ONSTACK(comp);
@@ -750,34 +733,13 @@ static enum blk_eh_timer_return virtscsi_eh_timed_out(struct scsi_cmnd *scmnd)
  return BLK_EH_RESET_TIMER;
 }
 
-static struct scsi_host_template virtscsi_host_template_single = {
+static struct scsi_host_template virtscsi_host_template = {
  .module = THIS_MODULE,
  .name = "Virtio SCSI HBA",
  .proc_name = "virtio_scsi",
  .this_id = -1,
  .cmd_size = sizeof(struct virtio_scsi_cmd),
- .queuecommand = virtscsi_queuecommand_single,
- .change_queue_depth = virtscsi_change_queue_depth,
- .eh_abort_handler = virtscsi_abort,
- .eh_device_reset_handler = virtscsi_device_reset,
- .eh_timed_out = virtscsi_eh_timed_out,
- .slave_alloc = virtscsi_device_alloc,
-
- .dma_boundary = UINT_MAX,
- .use_clustering = ENABLE_CLUSTERING,
- .target_alloc = virtscsi_target_alloc,
- .target_destroy = virtscsi_target_destroy,
- .track_queue_depth = 1,
- .force_blk_mq = 1,
-};
-
-static struct scsi_host_template virtscsi_host_template_multi = {
- .module = THIS_MODULE,
- .name = "Virtio SCSI HBA",
- .proc_name = "virtio_scsi",
- .this_id = -1,
- .cmd_size = sizeof(struct virtio_scsi_cmd),
- .queuecommand = virtscsi_queuecommand_multi,
+ .queuecommand = virtscsi_queuecommand,
  .change_queue_depth = virtscsi_change_queue_depth,
  .eh_abort_handler = virtscsi_abort,
  .eh_device_reset_handler = virtscsi_device_reset,
@@ -883,7 +845,6 @@ static int virtscsi_probe(struct virtio_device *vdev)
  u32 sg_elems, num_targets;
  u32 cmd_per_lun;
  u32 num_queues;
- struct scsi_host_template *hostt;
 
  if (!vdev->config->get) {
  dev_err(&vdev->dev, "%s failure: config access disabled\n",
@@ -896,12 +857,7 @@ static int virtscsi_probe(struct virtio_device *vdev)
 
  num_targets = virtscsi_config_get(vdev, max_target) + 1;
 
- if (num_queues == 1)
- hostt = &virtscsi_host_template_single;
- else
- hostt = &virtscsi_host_template_multi;
-
- shost = scsi_host_alloc(hostt,
+ shost = scsi_host_alloc(&virtscsi_host_template,
  sizeof(*vscsi) + sizeof(vscsi->req_vqs[0]) * num_queues);
  if (!shost)
  return -ENOMEM;
--
2.17.0


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

Re: [Bionic][PATCH v2 00/21] blk-mq scheduler fixes

Kleber Souza
In reply to this post by Jose Ricardo Ziviani
On 05/10/18 18:23, Jose Ricardo Ziviani wrote:

> From: Jose Ricardo Ziviani <[hidden email]>
>
> Hello team!
>
> Weeks ago I sent a patchset with:
>  * genirq/affinity: Spread irq vectors among present CPUs as far as possible
>  * blk-mq: simplify queue mapping & schedule with each possisble CPU
>
> Unfortunately they broke some cases, in particular the hpsa driver, so they
> had to be reverted. However, the bugs in blk_mq are leading to a very
> unstable KVM/Qemu virtual machines whenever CPU hotplug/unplug and SMT
> changes are performed, also impacting live migration.
>
> So, this is a new attempt to have the patches included. This new version
> includes all related fixes available upstream.
>
> It's based on Ubuntu-4.15.0-21.22 tag.
>
> Thank you!
>
> Jose Ricardo Ziviani

Hi Ziviani,

The patches are missing the BugLink reference. By the history I assume
they are the follow-up fixes for LP: #1759723.

This is a large amount of patches to be applied for some sensitive
subsystems, which would make it hard to guarantee that there are no
regressions. Would you be able to identify a smaller subset of these
patches that would fix the problem you are facing?


Thanks,
Kleber


>
> Bart Van Assche (1):
>   blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended
>     delays
>
> Christoph Hellwig (2):
>   genirq/affinity: assign vectors to all possible CPUs
>   blk-mq: simplify queue mapping & schedule with each possisble CPU
>
> Jens Axboe (1):
>   blk-mq: fix discard merge with scheduler attached
>
> Ming Lei (16):
>   genirq/affinity: Rename *node_to_possible_cpumask as *node_to_cpumask
>   genirq/affinity: Move actual irq vector spreading into a helper
>     function
>   genirq/affinity: Allow irq spreading from a given starting point
>   genirq/affinity: Spread irq vectors among present CPUs as far as
>     possible
>   blk-mq: make sure hctx->next_cpu is set correctly
>   blk-mq: make sure that correct hctx->next_cpu is set
>   blk-mq: don't keep offline CPUs mapped to hctx 0
>   blk-mq: avoid to write intermediate result to hctx->next_cpu
>   blk-mq: introduce blk_mq_hw_queue_first_cpu() to figure out first cpu
>   blk-mq: don't check queue mapped in __blk_mq_delay_run_hw_queue()
>   nvme: pci: pass max vectors as num_possible_cpus() to
>     pci_alloc_irq_vectors
>   scsi: hpsa: fix selection of reply queue
>   scsi: megaraid_sas: fix selection of reply queue
>   scsi: core: introduce force_blk_mq
>   scsi: virtio_scsi: fix IO hang caused by automatic irq vector affinity
>   scsi: virtio_scsi: unify scsi_host_template
>
> Thomas Gleixner (1):
>   genirq/affinity: Don't return with empty affinity masks on error
>
>  block/blk-core.c                            |   2 +
>  block/blk-merge.c                           |  29 +++-
>  block/blk-mq-cpumap.c                       |   5 -
>  block/blk-mq.c                              |  65 +++++---
>  drivers/nvme/host/pci.c                     |   2 +-
>  drivers/scsi/hosts.c                        |   1 +
>  drivers/scsi/hpsa.c                         |  73 ++++++---
>  drivers/scsi/hpsa.h                         |   1 +
>  drivers/scsi/megaraid/megaraid_sas.h        |   1 +
>  drivers/scsi/megaraid/megaraid_sas_base.c   |  39 ++++-
>  drivers/scsi/megaraid/megaraid_sas_fusion.c |  12 +-
>  drivers/scsi/virtio_scsi.c                  | 129 ++-------------
>  include/scsi/scsi_host.h                    |   3 +
>  kernel/irq/affinity.c                       | 166 +++++++++++++-------
>  14 files changed, 296 insertions(+), 232 deletions(-)
>

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

Re: [Bionic][PATCH v2 00/21] blk-mq scheduler fixes

joserz
Hello Kleber!

I've sent the 3rd version with the bugfix tag included. But I'll check
if I can remove some commits from the set in order to make it less
complex.

Thank you!

On Wed, May 16, 2018 at 12:01:03PM +0200, Kleber Souza wrote:

> On 05/10/18 18:23, Jose Ricardo Ziviani wrote:
> > From: Jose Ricardo Ziviani <[hidden email]>
> >
> > Hello team!
> >
> > Weeks ago I sent a patchset with:
> >  * genirq/affinity: Spread irq vectors among present CPUs as far as possible
> >  * blk-mq: simplify queue mapping & schedule with each possisble CPU
> >
> > Unfortunately they broke some cases, in particular the hpsa driver, so they
> > had to be reverted. However, the bugs in blk_mq are leading to a very
> > unstable KVM/Qemu virtual machines whenever CPU hotplug/unplug and SMT
> > changes are performed, also impacting live migration.
> >
> > So, this is a new attempt to have the patches included. This new version
> > includes all related fixes available upstream.
> >
> > It's based on Ubuntu-4.15.0-21.22 tag.
> >
> > Thank you!
> >
> > Jose Ricardo Ziviani
>
> Hi Ziviani,
>
> The patches are missing the BugLink reference. By the history I assume
> they are the follow-up fixes for LP: #1759723.
>
> This is a large amount of patches to be applied for some sensitive
> subsystems, which would make it hard to guarantee that there are no
> regressions. Would you be able to identify a smaller subset of these
> patches that would fix the problem you are facing?
>
>
> Thanks,
> Kleber
>
>
> >
> > Bart Van Assche (1):
> >   blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended
> >     delays
> >
> > Christoph Hellwig (2):
> >   genirq/affinity: assign vectors to all possible CPUs
> >   blk-mq: simplify queue mapping & schedule with each possisble CPU
> >
> > Jens Axboe (1):
> >   blk-mq: fix discard merge with scheduler attached
> >
> > Ming Lei (16):
> >   genirq/affinity: Rename *node_to_possible_cpumask as *node_to_cpumask
> >   genirq/affinity: Move actual irq vector spreading into a helper
> >     function
> >   genirq/affinity: Allow irq spreading from a given starting point
> >   genirq/affinity: Spread irq vectors among present CPUs as far as
> >     possible
> >   blk-mq: make sure hctx->next_cpu is set correctly
> >   blk-mq: make sure that correct hctx->next_cpu is set
> >   blk-mq: don't keep offline CPUs mapped to hctx 0
> >   blk-mq: avoid to write intermediate result to hctx->next_cpu
> >   blk-mq: introduce blk_mq_hw_queue_first_cpu() to figure out first cpu
> >   blk-mq: don't check queue mapped in __blk_mq_delay_run_hw_queue()
> >   nvme: pci: pass max vectors as num_possible_cpus() to
> >     pci_alloc_irq_vectors
> >   scsi: hpsa: fix selection of reply queue
> >   scsi: megaraid_sas: fix selection of reply queue
> >   scsi: core: introduce force_blk_mq
> >   scsi: virtio_scsi: fix IO hang caused by automatic irq vector affinity
> >   scsi: virtio_scsi: unify scsi_host_template
> >
> > Thomas Gleixner (1):
> >   genirq/affinity: Don't return with empty affinity masks on error
> >
> >  block/blk-core.c                            |   2 +
> >  block/blk-merge.c                           |  29 +++-
> >  block/blk-mq-cpumap.c                       |   5 -
> >  block/blk-mq.c                              |  65 +++++---
> >  drivers/nvme/host/pci.c                     |   2 +-
> >  drivers/scsi/hosts.c                        |   1 +
> >  drivers/scsi/hpsa.c                         |  73 ++++++---
> >  drivers/scsi/hpsa.h                         |   1 +
> >  drivers/scsi/megaraid/megaraid_sas.h        |   1 +
> >  drivers/scsi/megaraid/megaraid_sas_base.c   |  39 ++++-
> >  drivers/scsi/megaraid/megaraid_sas_fusion.c |  12 +-
> >  drivers/scsi/virtio_scsi.c                  | 129 ++-------------
> >  include/scsi/scsi_host.h                    |   3 +
> >  kernel/irq/affinity.c                       | 166 +++++++++++++-------
> >  14 files changed, 296 insertions(+), 232 deletions(-)
> >
>


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
12