[c/azure][PATCH] scsi: storvsc: Fix a race in sub-channel creation that can cause panic

classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

[c/azure][PATCH] scsi: storvsc: Fix a race in sub-channel creation that can cause panic

Marcelo Henrique Cerri
From: Dexuan Cui <[hidden email]>

BugLink: http://bugs.launchpad.net/bugs/1823805

We can concurrently try to open the same sub-channel from 2 paths:

path #1: vmbus_onoffer() -> vmbus_process_offer() -> handle_sc_creation().
path #2: storvsc_probe() -> storvsc_connect_to_vsp() ->
         -> storvsc_channel_init() -> handle_multichannel_storage() ->
         -> vmbus_are_subchannels_present() -> handle_sc_creation().

They conflict with each other, but it was not an issue before the recent
commit ae6935ed7d42 ("vmbus: split ring buffer allocation from open"),
because at the beginning of vmbus_open() we checked newchannel->state so
only one path could succeed, and the other would return with -EINVAL.

After ae6935ed7d42, the failing path frees the channel's ringbuffer by
vmbus_free_ring(), and this causes a panic later.

Commit ae6935ed7d42 itself is good, and it just reveals the longstanding
race. We can resolve the issue by removing path #2, i.e. removing the
second vmbus_are_subchannels_present() in handle_multichannel_storage().

BTW, the comment "Check to see if sub-channels have already been created"
in handle_multichannel_storage() is incorrect: when we unload the driver,
we first close the sub-channel(s) and then close the primary channel, next
the host sends rescind-offer message(s) so primary->sc_list will become
empty. This means the first vmbus_are_subchannels_present() in
handle_multichannel_storage() is never useful.

Fixes: ae6935ed7d42 ("vmbus: split ring buffer allocation from open")
Cc: [hidden email]
Cc: Long Li <[hidden email]>
Cc: Stephen Hemminger <[hidden email]>
Cc: K. Y. Srinivasan <[hidden email]>
Cc: Haiyang Zhang <[hidden email]>
Signed-off-by: Dexuan Cui <[hidden email]>
Signed-off-by: K. Y. Srinivasan <[hidden email]>
Signed-off-by: Martin K. Petersen <[hidden email]>
(cherry picked from commit c967590457cae5ba4f018704c341641bdcecfdcf)
Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
---
 drivers/scsi/storvsc_drv.c | 61 +++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 883bff0ba3ce..57fb1b067b33 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -446,7 +446,6 @@ struct storvsc_device {
 
  bool destroy;
  bool drain_notify;
- bool open_sub_channel;
  atomic_t num_outstanding_req;
  struct Scsi_Host *host;
 
@@ -636,33 +635,38 @@ static inline struct storvsc_device *get_in_stor_device(
 static void handle_sc_creation(struct vmbus_channel *new_sc)
 {
  struct hv_device *device = new_sc->primary_channel->device_obj;
+ struct device *dev = &device->device;
  struct storvsc_device *stor_device;
  struct vmstorage_channel_properties props;
+ int ret;
 
  stor_device = get_out_stor_device(device);
  if (!stor_device)
  return;
 
- if (stor_device->open_sub_channel == false)
- return;
-
  memset(&props, 0, sizeof(struct vmstorage_channel_properties));
 
- vmbus_open(new_sc,
-   storvsc_ringbuffer_size,
-   storvsc_ringbuffer_size,
-   (void *)&props,
-   sizeof(struct vmstorage_channel_properties),
-   storvsc_on_channel_callback, new_sc);
+ ret = vmbus_open(new_sc,
+ storvsc_ringbuffer_size,
+ storvsc_ringbuffer_size,
+ (void *)&props,
+ sizeof(struct vmstorage_channel_properties),
+ storvsc_on_channel_callback, new_sc);
 
- if (new_sc->state == CHANNEL_OPENED_STATE) {
- stor_device->stor_chns[new_sc->target_cpu] = new_sc;
- cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
+ /* In case vmbus_open() fails, we don't use the sub-channel. */
+ if (ret != 0) {
+ dev_err(dev, "Failed to open sub-channel: err=%d\n", ret);
+ return;
  }
+
+ /* Add the sub-channel to the array of available channels. */
+ stor_device->stor_chns[new_sc->target_cpu] = new_sc;
+ cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
 }
 
 static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
 {
+ struct device *dev = &device->device;
  struct storvsc_device *stor_device;
  int num_cpus = num_online_cpus();
  int num_sc;
@@ -679,21 +683,11 @@ static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
  request = &stor_device->init_request;
  vstor_packet = &request->vstor_packet;
 
- stor_device->open_sub_channel = true;
  /*
  * Establish a handler for dealing with subchannels.
  */
  vmbus_set_sc_create_callback(device->channel, handle_sc_creation);
 
- /*
- * Check to see if sub-channels have already been created. This
- * can happen when this driver is re-loaded after unloading.
- */
-
- if (vmbus_are_subchannels_present(device->channel))
- return;
-
- stor_device->open_sub_channel = false;
  /*
  * Request the host to create sub-channels.
  */
@@ -710,23 +704,29 @@ static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
        VM_PKT_DATA_INBAND,
        VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
- if (ret != 0)
+ if (ret != 0) {
+ dev_err(dev, "Failed to create sub-channel: err=%d\n", ret);
  return;
+ }
 
  t = wait_for_completion_timeout(&request->wait_event, 10*HZ);
- if (t == 0)
+ if (t == 0) {
+ dev_err(dev, "Failed to create sub-channel: timed out\n");
  return;
+ }
 
  if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO ||
-    vstor_packet->status != 0)
+    vstor_packet->status != 0) {
+ dev_err(dev, "Failed to create sub-channel: op=%d, sts=%d\n",
+ vstor_packet->operation, vstor_packet->status);
  return;
+ }
 
  /*
- * Now that we created the sub-channels, invoke the check; this
- * may trigger the callback.
+ * We need to do nothing here, because vmbus_process_offer()
+ * invokes channel->sc_creation_callback, which will open and use
+ * the sub-channel(s).
  */
- stor_device->open_sub_channel = true;
- vmbus_are_subchannels_present(device->channel);
 }
 
 static void cache_wwn(struct storvsc_device *stor_device,
@@ -1794,7 +1794,6 @@ static int storvsc_probe(struct hv_device *device,
  }
 
  stor_device->destroy = false;
- stor_device->open_sub_channel = false;
  init_waitqueue_head(&stor_device->waiting_to_drain);
  stor_device->device = device;
  stor_device->host = host;
--
2.17.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

ACK: [c/azure][PATCH] scsi: storvsc: Fix a race in sub-channel creation that can cause panic

Seth Forshee
On Tue, Apr 09, 2019 at 10:55:27AM -0300, Marcelo Henrique Cerri wrote:

> From: Dexuan Cui <[hidden email]>
>
> BugLink: http://bugs.launchpad.net/bugs/1823805
>
> We can concurrently try to open the same sub-channel from 2 paths:
>
> path #1: vmbus_onoffer() -> vmbus_process_offer() -> handle_sc_creation().
> path #2: storvsc_probe() -> storvsc_connect_to_vsp() ->
> -> storvsc_channel_init() -> handle_multichannel_storage() ->
> -> vmbus_are_subchannels_present() -> handle_sc_creation().
>
> They conflict with each other, but it was not an issue before the recent
> commit ae6935ed7d42 ("vmbus: split ring buffer allocation from open"),
> because at the beginning of vmbus_open() we checked newchannel->state so
> only one path could succeed, and the other would return with -EINVAL.
>
> After ae6935ed7d42, the failing path frees the channel's ringbuffer by
> vmbus_free_ring(), and this causes a panic later.
>
> Commit ae6935ed7d42 itself is good, and it just reveals the longstanding
> race. We can resolve the issue by removing path #2, i.e. removing the
> second vmbus_are_subchannels_present() in handle_multichannel_storage().
>
> BTW, the comment "Check to see if sub-channels have already been created"
> in handle_multichannel_storage() is incorrect: when we unload the driver,
> we first close the sub-channel(s) and then close the primary channel, next
> the host sends rescind-offer message(s) so primary->sc_list will become
> empty. This means the first vmbus_are_subchannels_present() in
> handle_multichannel_storage() is never useful.
>
> Fixes: ae6935ed7d42 ("vmbus: split ring buffer allocation from open")
> Cc: [hidden email]
> Cc: Long Li <[hidden email]>
> Cc: Stephen Hemminger <[hidden email]>
> Cc: K. Y. Srinivasan <[hidden email]>
> Cc: Haiyang Zhang <[hidden email]>
> Signed-off-by: Dexuan Cui <[hidden email]>
> Signed-off-by: K. Y. Srinivasan <[hidden email]>
> Signed-off-by: Martin K. Petersen <[hidden email]>
> (cherry picked from commit c967590457cae5ba4f018704c341641bdcecfdcf)
> Signed-off-by: Marcelo Henrique Cerri <[hidden email]>

Upstream cherry pick which has been included in stable releases, fixes a
serious issue.

Acked-by: Seth Forshee <[hidden email]>

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

ACK/Cmnt: [c/azure][PATCH] scsi: storvsc: Fix a race in sub-channel creation that can cause panic

Stefan Bader-2
In reply to this post by Marcelo Henrique Cerri
On 09.04.19 15:55, Marcelo Henrique Cerri wrote:

> From: Dexuan Cui <[hidden email]>
>
> BugLink: http://bugs.launchpad.net/bugs/1823805
>
> We can concurrently try to open the same sub-channel from 2 paths:
>
> path #1: vmbus_onoffer() -> vmbus_process_offer() -> handle_sc_creation().
> path #2: storvsc_probe() -> storvsc_connect_to_vsp() ->
> -> storvsc_channel_init() -> handle_multichannel_storage() ->
> -> vmbus_are_subchannels_present() -> handle_sc_creation().
>
> They conflict with each other, but it was not an issue before the recent
> commit ae6935ed7d42 ("vmbus: split ring buffer allocation from open"),
> because at the beginning of vmbus_open() we checked newchannel->state so
> only one path could succeed, and the other would return with -EINVAL.
>
> After ae6935ed7d42, the failing path frees the channel's ringbuffer by
> vmbus_free_ring(), and this causes a panic later.
>
> Commit ae6935ed7d42 itself is good, and it just reveals the longstanding
> race. We can resolve the issue by removing path #2, i.e. removing the
> second vmbus_are_subchannels_present() in handle_multichannel_storage().
>
> BTW, the comment "Check to see if sub-channels have already been created"
> in handle_multichannel_storage() is incorrect: when we unload the driver,
> we first close the sub-channel(s) and then close the primary channel, next
> the host sends rescind-offer message(s) so primary->sc_list will become
> empty. This means the first vmbus_are_subchannels_present() in
> handle_multichannel_storage() is never useful.
>
> Fixes: ae6935ed7d42 ("vmbus: split ring buffer allocation from open")
> Cc: [hidden email]
> Cc: Long Li <[hidden email]>
> Cc: Stephen Hemminger <[hidden email]>
> Cc: K. Y. Srinivasan <[hidden email]>
> Cc: Haiyang Zhang <[hidden email]>
> Signed-off-by: Dexuan Cui <[hidden email]>
> Signed-off-by: K. Y. Srinivasan <[hidden email]>
> Signed-off-by: Martin K. Petersen <[hidden email]>
> (cherry picked from commit c967590457cae5ba4f018704c341641bdcecfdcf)
> Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
Acked-by: Stefan Bader <[hidden email]>
> ---

If that is not already done

>  drivers/scsi/storvsc_drv.c | 61 +++++++++++++++++++-------------------
>  1 file changed, 30 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> index 883bff0ba3ce..57fb1b067b33 100644
> --- a/drivers/scsi/storvsc_drv.c
> +++ b/drivers/scsi/storvsc_drv.c
> @@ -446,7 +446,6 @@ struct storvsc_device {
>  
>   bool destroy;
>   bool drain_notify;
> - bool open_sub_channel;
>   atomic_t num_outstanding_req;
>   struct Scsi_Host *host;
>  
> @@ -636,33 +635,38 @@ static inline struct storvsc_device *get_in_stor_device(
>  static void handle_sc_creation(struct vmbus_channel *new_sc)
>  {
>   struct hv_device *device = new_sc->primary_channel->device_obj;
> + struct device *dev = &device->device;
>   struct storvsc_device *stor_device;
>   struct vmstorage_channel_properties props;
> + int ret;
>  
>   stor_device = get_out_stor_device(device);
>   if (!stor_device)
>   return;
>  
> - if (stor_device->open_sub_channel == false)
> - return;
> -
>   memset(&props, 0, sizeof(struct vmstorage_channel_properties));
>  
> - vmbus_open(new_sc,
> -   storvsc_ringbuffer_size,
> -   storvsc_ringbuffer_size,
> -   (void *)&props,
> -   sizeof(struct vmstorage_channel_properties),
> -   storvsc_on_channel_callback, new_sc);
> + ret = vmbus_open(new_sc,
> + storvsc_ringbuffer_size,
> + storvsc_ringbuffer_size,
> + (void *)&props,
> + sizeof(struct vmstorage_channel_properties),
> + storvsc_on_channel_callback, new_sc);
>  
> - if (new_sc->state == CHANNEL_OPENED_STATE) {
> - stor_device->stor_chns[new_sc->target_cpu] = new_sc;
> - cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
> + /* In case vmbus_open() fails, we don't use the sub-channel. */
> + if (ret != 0) {
> + dev_err(dev, "Failed to open sub-channel: err=%d\n", ret);
> + return;
>   }
> +
> + /* Add the sub-channel to the array of available channels. */
> + stor_device->stor_chns[new_sc->target_cpu] = new_sc;
> + cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
>  }
>  
>  static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
>  {
> + struct device *dev = &device->device;
>   struct storvsc_device *stor_device;
>   int num_cpus = num_online_cpus();
>   int num_sc;
> @@ -679,21 +683,11 @@ static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
>   request = &stor_device->init_request;
>   vstor_packet = &request->vstor_packet;
>  
> - stor_device->open_sub_channel = true;
>   /*
>   * Establish a handler for dealing with subchannels.
>   */
>   vmbus_set_sc_create_callback(device->channel, handle_sc_creation);
>  
> - /*
> - * Check to see if sub-channels have already been created. This
> - * can happen when this driver is re-loaded after unloading.
> - */
> -
> - if (vmbus_are_subchannels_present(device->channel))
> - return;
> -
> - stor_device->open_sub_channel = false;
>   /*
>   * Request the host to create sub-channels.
>   */
> @@ -710,23 +704,29 @@ static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
>         VM_PKT_DATA_INBAND,
>         VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
>  
> - if (ret != 0)
> + if (ret != 0) {
> + dev_err(dev, "Failed to create sub-channel: err=%d\n", ret);
>   return;
> + }
>  
>   t = wait_for_completion_timeout(&request->wait_event, 10*HZ);
> - if (t == 0)
> + if (t == 0) {
> + dev_err(dev, "Failed to create sub-channel: timed out\n");
>   return;
> + }
>  
>   if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO ||
> -    vstor_packet->status != 0)
> +    vstor_packet->status != 0) {
> + dev_err(dev, "Failed to create sub-channel: op=%d, sts=%d\n",
> + vstor_packet->operation, vstor_packet->status);
>   return;
> + }
>  
>   /*
> - * Now that we created the sub-channels, invoke the check; this
> - * may trigger the callback.
> + * We need to do nothing here, because vmbus_process_offer()
> + * invokes channel->sc_creation_callback, which will open and use
> + * the sub-channel(s).
>   */
> - stor_device->open_sub_channel = true;
> - vmbus_are_subchannels_present(device->channel);
>  }
>  
>  static void cache_wwn(struct storvsc_device *stor_device,
> @@ -1794,7 +1794,6 @@ static int storvsc_probe(struct hv_device *device,
>   }
>  
>   stor_device->destroy = false;
> - stor_device->open_sub_channel = false;
>   init_waitqueue_head(&stor_device->waiting_to_drain);
>   stor_device->device = device;
>   stor_device->host = host;
>


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team

signature.asc (849 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

APPLIED: [c/azure][PATCH] scsi: storvsc: Fix a race in sub-channel creation that can cause panic

Marcelo Henrique Cerri
In reply to this post by Marcelo Henrique Cerri
--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team

signature.asc (499 bytes) Download Attachment