[SRU bionic/master-next 1/1 V2] UBUNTU: SAUCE: base/dd: limit release function changes to vfio driver only

classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|

[SRU bionic/master-next 1/1 V2] UBUNTU: SAUCE: base/dd: limit release function changes to vfio driver only

Andy Whitcroft-3
It seems that we can trigger the new race detection after remove()
with some drivers which clear the driver as they unreference
the module.  This leads us to fail to clear down those devices which
triggers suspend/resume issues.

Limit the core changes to only apply to the vfio driver while we
work with upstream on a more generic fix.

Fixes: 876dcb5f4576 ("UBUNTU: SAUCE: vfio -- release device lock before userspace requests")
BugLink: http://bugs.launchpad.net/bugs/1803942
Signed-off-by: Andy Whitcroft <[hidden email]>
---
 drivers/base/dd.c           | 10 +++++++++-
 drivers/vfio/pci/vfio_pci.c |  6 ++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 37c01054521b..045d4e4485b8 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -820,6 +820,9 @@ int driver_attach(struct device_driver *drv)
 }
 EXPORT_SYMBOL_GPL(driver_attach);
 
+void *vfio_pci_driver_ptr = (void *)0xdeadfeed;
+EXPORT_SYMBOL(vfio_pci_driver_ptr);
+
 /*
  * __device_release_driver() must be called with @dev lock held.
  * When called for a USB interface, @dev->parent lock must be held as well.
@@ -872,8 +875,13 @@ static void __device_release_driver(struct device *dev, struct device *parent)
  * A concurrent invocation of the same function might
  * have released the driver successfully while this one
  * was waiting, so check for that.
+ * LP: #1792099
+ *
+ * Limit this to the vfio_pci_driver as some drivers NULL
+ * out this pointer in their remove() function.
+ * LP: #1803942
  */
- if (dev->driver != drv)
+ if (drv == vfio_pci_driver_ptr && dev->driver != drv)
  return;
 
  device_links_driver_cleanup(dev);
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index f041b1a6cf66..6e70281715c4 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1310,6 +1310,7 @@ static struct pci_driver vfio_pci_driver = {
  .remove = vfio_pci_remove,
  .err_handler = &vfio_err_handlers,
 };
+extern void *vfio_pci_driver_ptr;
 
 struct vfio_devices {
  struct vfio_device **devices;
@@ -1404,6 +1405,8 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 
 static void __exit vfio_pci_cleanup(void)
 {
+ vfio_pci_driver_ptr = (void *)0xdeadfeed;
+
  pci_unregister_driver(&vfio_pci_driver);
  vfio_pci_uninit_perm_bits();
 }
@@ -1465,6 +1468,9 @@ static int __init vfio_pci_init(void)
 
  vfio_pci_fill_ids();
 
+ /* Advertise my address. */
+ vfio_pci_driver_ptr = &vfio_pci_driver;
+
  return 0;
 
 out_driver:
--
2.19.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

ACK: [SRU bionic/master-next 1/1 V2] UBUNTU: SAUCE: base/dd: limit release function changes to vfio driver only

Colin Ian King-2
On 06/12/2018 12:32, Andy Whitcroft wrote:

> It seems that we can trigger the new race detection after remove()
> with some drivers which clear the driver as they unreference
> the module.  This leads us to fail to clear down those devices which
> triggers suspend/resume issues.
>
> Limit the core changes to only apply to the vfio driver while we
> work with upstream on a more generic fix.
>
> Fixes: 876dcb5f4576 ("UBUNTU: SAUCE: vfio -- release device lock before userspace requests")
> BugLink: http://bugs.launchpad.net/bugs/1803942
> Signed-off-by: Andy Whitcroft <[hidden email]>
> ---
>  drivers/base/dd.c           | 10 +++++++++-
>  drivers/vfio/pci/vfio_pci.c |  6 ++++++
>  2 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 37c01054521b..045d4e4485b8 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -820,6 +820,9 @@ int driver_attach(struct device_driver *drv)
>  }
>  EXPORT_SYMBOL_GPL(driver_attach);
>  
> +void *vfio_pci_driver_ptr = (void *)0xdeadfeed;
> +EXPORT_SYMBOL(vfio_pci_driver_ptr);
> +
>  /*
>   * __device_release_driver() must be called with @dev lock held.
>   * When called for a USB interface, @dev->parent lock must be held as well.
> @@ -872,8 +875,13 @@ static void __device_release_driver(struct device *dev, struct device *parent)
>   * A concurrent invocation of the same function might
>   * have released the driver successfully while this one
>   * was waiting, so check for that.
> + * LP: #1792099
> + *
> + * Limit this to the vfio_pci_driver as some drivers NULL
> + * out this pointer in their remove() function.
> + * LP: #1803942
>   */
> - if (dev->driver != drv)
> + if (drv == vfio_pci_driver_ptr && dev->driver != drv)
>   return;
>  
>   device_links_driver_cleanup(dev);
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f041b1a6cf66..6e70281715c4 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -1310,6 +1310,7 @@ static struct pci_driver vfio_pci_driver = {
>   .remove = vfio_pci_remove,
>   .err_handler = &vfio_err_handlers,
>  };
> +extern void *vfio_pci_driver_ptr;
>  
>  struct vfio_devices {
>   struct vfio_device **devices;
> @@ -1404,6 +1405,8 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
>  
>  static void __exit vfio_pci_cleanup(void)
>  {
> + vfio_pci_driver_ptr = (void *)0xdeadfeed;

Out of interest, is there any reason why NULL wasn't used apart from we
can identify these faults easier because of this specific address?

> +
>   pci_unregister_driver(&vfio_pci_driver);
>   vfio_pci_uninit_perm_bits();
>  }
> @@ -1465,6 +1468,9 @@ static int __init vfio_pci_init(void)
>  
>   vfio_pci_fill_ids();
>  
> + /* Advertise my address. */
> + vfio_pci_driver_ptr = &vfio_pci_driver;
> +
>   return 0;
>  
>  out_driver:
>

As a temporary workaround this seems OK to me, but only as a workaround.
I'm assuming that this will be replaced by the official upstream fix at
a later date.

Acked-by: Colin Ian King <[hidden email]>


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

ACK/Cmnt: [SRU bionic/master-next 1/1 V2] UBUNTU: SAUCE: base/dd: limit release function changes to vfio driver only

Stefan Bader-2
In reply to this post by Andy Whitcroft-3
On 06.12.18 13:32, Andy Whitcroft wrote:

> It seems that we can trigger the new race detection after remove()
> with some drivers which clear the driver as they unreference
> the module.  This leads us to fail to clear down those devices which
> triggers suspend/resume issues.
>
> Limit the core changes to only apply to the vfio driver while we
> work with upstream on a more generic fix.
>
> Fixes: 876dcb5f4576 ("UBUNTU: SAUCE: vfio -- release device lock before userspace requests")
> BugLink: http://bugs.launchpad.net/bugs/1803942
> Signed-off-by: Andy Whitcroft <[hidden email]>
Acked-by: Stefan Bader <[hidden email]>
> ---

I guess this is better as the variable is now located in the base driver and
thus exists regardless of the vfio driver being loaded. Not sure whether the
pci_unregister_driver call in vfio would still want the pointer set. But if that
sounds ok to you I am good with that.

-Stefan

>  drivers/base/dd.c           | 10 +++++++++-
>  drivers/vfio/pci/vfio_pci.c |  6 ++++++
>  2 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 37c01054521b..045d4e4485b8 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -820,6 +820,9 @@ int driver_attach(struct device_driver *drv)
>  }
>  EXPORT_SYMBOL_GPL(driver_attach);
>  
> +void *vfio_pci_driver_ptr = (void *)0xdeadfeed;
> +EXPORT_SYMBOL(vfio_pci_driver_ptr);
> +
>  /*
>   * __device_release_driver() must be called with @dev lock held.
>   * When called for a USB interface, @dev->parent lock must be held as well.
> @@ -872,8 +875,13 @@ static void __device_release_driver(struct device *dev, struct device *parent)
>   * A concurrent invocation of the same function might
>   * have released the driver successfully while this one
>   * was waiting, so check for that.
> + * LP: #1792099
> + *
> + * Limit this to the vfio_pci_driver as some drivers NULL
> + * out this pointer in their remove() function.
> + * LP: #1803942
>   */
> - if (dev->driver != drv)
> + if (drv == vfio_pci_driver_ptr && dev->driver != drv)
>   return;
>  
>   device_links_driver_cleanup(dev);
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f041b1a6cf66..6e70281715c4 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -1310,6 +1310,7 @@ static struct pci_driver vfio_pci_driver = {
>   .remove = vfio_pci_remove,
>   .err_handler = &vfio_err_handlers,
>  };
> +extern void *vfio_pci_driver_ptr;
>  
>  struct vfio_devices {
>   struct vfio_device **devices;
> @@ -1404,6 +1405,8 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
>  
>  static void __exit vfio_pci_cleanup(void)
>  {
> + vfio_pci_driver_ptr = (void *)0xdeadfeed;
> +
>   pci_unregister_driver(&vfio_pci_driver);
>   vfio_pci_uninit_perm_bits();
>  }
> @@ -1465,6 +1468,9 @@ static int __init vfio_pci_init(void)
>  
>   vfio_pci_fill_ids();
>  
> + /* Advertise my address. */
> + vfio_pci_driver_ptr = &vfio_pci_driver;
> +
>   return 0;
>  
>  out_driver:
>


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team

signature.asc (836 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: ACK/Cmnt: [SRU bionic/master-next 1/1 V2] UBUNTU: SAUCE: base/dd: limit release function changes to vfio driver only

Andy Whitcroft-3
On Thu, Dec 06, 2018 at 01:52:55PM +0100, Stefan Bader wrote:

> On 06.12.18 13:32, Andy Whitcroft wrote:
> > It seems that we can trigger the new race detection after remove()
> > with some drivers which clear the driver as they unreference
> > the module.  This leads us to fail to clear down those devices which
> > triggers suspend/resume issues.
> >
> > Limit the core changes to only apply to the vfio driver while we
> > work with upstream on a more generic fix.
> >
> > Fixes: 876dcb5f4576 ("UBUNTU: SAUCE: vfio -- release device lock before userspace requests")
> > BugLink: http://bugs.launchpad.net/bugs/1803942
> > Signed-off-by: Andy Whitcroft <[hidden email]>
> Acked-by: Stefan Bader <[hidden email]>
> > ---
>
> I guess this is better as the variable is now located in the base driver and
> thus exists regardless of the vfio driver being loaded. Not sure whether the
> pci_unregister_driver call in vfio would still want the pointer set. But if that
> sounds ok to you I am good with that.

I do not believe it needs it.  This is not something many configurations
can even trigger and removing the driver is not something most people
would do in the normal run of things.

-apw

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

APPLIED: [SRU bionic/master-next 1/1 V2] UBUNTU: SAUCE: base/dd: limit release function changes to vfio driver only

Kleber Souza
In reply to this post by Andy Whitcroft-3
On 12/6/18 1:32 PM, Andy Whitcroft wrote:

> It seems that we can trigger the new race detection after remove()
> with some drivers which clear the driver as they unreference
> the module.  This leads us to fail to clear down those devices which
> triggers suspend/resume issues.
>
> Limit the core changes to only apply to the vfio driver while we
> work with upstream on a more generic fix.
>
> Fixes: 876dcb5f4576 ("UBUNTU: SAUCE: vfio -- release device lock before userspace requests")
> BugLink: http://bugs.launchpad.net/bugs/1803942
> Signed-off-by: Andy Whitcroft <[hidden email]>
> ---
>  drivers/base/dd.c           | 10 +++++++++-
>  drivers/vfio/pci/vfio_pci.c |  6 ++++++
>  2 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 37c01054521b..045d4e4485b8 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -820,6 +820,9 @@ int driver_attach(struct device_driver *drv)
>  }
>  EXPORT_SYMBOL_GPL(driver_attach);
>  
> +void *vfio_pci_driver_ptr = (void *)0xdeadfeed;
> +EXPORT_SYMBOL(vfio_pci_driver_ptr);
> +
>  /*
>   * __device_release_driver() must be called with @dev lock held.
>   * When called for a USB interface, @dev->parent lock must be held as well.
> @@ -872,8 +875,13 @@ static void __device_release_driver(struct device *dev, struct device *parent)
>   * A concurrent invocation of the same function might
>   * have released the driver successfully while this one
>   * was waiting, so check for that.
> + * LP: #1792099
> + *
> + * Limit this to the vfio_pci_driver as some drivers NULL
> + * out this pointer in their remove() function.
> + * LP: #1803942
>   */
> - if (dev->driver != drv)
> + if (drv == vfio_pci_driver_ptr && dev->driver != drv)
>   return;
>  
>   device_links_driver_cleanup(dev);
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f041b1a6cf66..6e70281715c4 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -1310,6 +1310,7 @@ static struct pci_driver vfio_pci_driver = {
>   .remove = vfio_pci_remove,
>   .err_handler = &vfio_err_handlers,
>  };
> +extern void *vfio_pci_driver_ptr;
>  
>  struct vfio_devices {
>   struct vfio_device **devices;
> @@ -1404,6 +1405,8 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
>  
>  static void __exit vfio_pci_cleanup(void)
>  {
> + vfio_pci_driver_ptr = (void *)0xdeadfeed;
> +
>   pci_unregister_driver(&vfio_pci_driver);
>   vfio_pci_uninit_perm_bits();
>  }
> @@ -1465,6 +1468,9 @@ static int __init vfio_pci_init(void)
>  
>   vfio_pci_fill_ids();
>  
> + /* Advertise my address. */
> + vfio_pci_driver_ptr = &vfio_pci_driver;
> +
>   return 0;
>  
>  out_driver:

Applied to bionic/master-next branch.

Thanks,
Kleber


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team