[PATCH 1/2][Zesty] powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
6 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[PATCH 1/2][Zesty] powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD

Breno Leitao
From: Alistair Popple <[hidden email]>

BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1701272

NPU2 requires an extra explicit flush to an active GPU PID when
sending address translation shoot downs (ATSDs) to reliably flush the
GPU TLB. This patch adds just such a flush at the end of each sequence
of ATSDs.

We can safely use PID 0 which is always reserved and active on the
GPU. PID 0 is only used for init_mm which will never be a user mm on
the GPU. To enforce this we add a check in pnv_npu2_init_context()
just in case someone tries to use PID 0 on the GPU.

Signed-off-by: Alistair Popple <[hidden email]>
[mpe: Use true/false for bool literals]
Signed-off-by: Michael Ellerman <[hidden email]>
(cherry picked from commit bbd5ff50afffcf4a01d05367524736c57607a478)
Signed-off-by: Breno Leitao <[hidden email]>
---
 arch/powerpc/platforms/powernv/npu-dma.c | 94 ++++++++++++++++++++++----------
 1 file changed, 65 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 3184c9e265fb..9e042cd4aa03 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -448,7 +448,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
  return mmio_atsd_reg;
 }
 
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
 {
  unsigned long launch;
 
@@ -464,12 +464,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
  /* PID */
  launch |= pid << PPC_BITLSHIFT(38);
 
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
  /* Invalidating the entire process doesn't use a va */
  return mmio_launch_invalidate(npu, launch, 0);
 }
 
 static int mmio_invalidate_va(struct npu *npu, unsigned long va,
- unsigned long pid)
+ unsigned long pid, bool flush)
 {
  unsigned long launch;
 
@@ -485,26 +488,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
  /* PID */
  launch |= pid << PPC_BITLSHIFT(38);
 
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
  return mmio_launch_invalidate(npu, launch, va);
 }
 
 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 
+struct mmio_atsd_reg {
+ struct npu *npu;
+ int reg;
+};
+
+static void mmio_invalidate_wait(
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+ struct npu *npu;
+ int i, reg;
+
+ /* Wait for all invalidations to complete */
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ /* Wait for completion */
+ npu = mmio_atsd_reg[i].npu;
+ reg = mmio_atsd_reg[i].reg;
+ while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+ cpu_relax();
+
+ put_mmio_atsd_reg(npu, reg);
+
+ /*
+ * The GPU requires two flush ATSDs to ensure all entries have
+ * been flushed. We use PID 0 as it will never be used for a
+ * process on the GPU.
+ */
+ if (flush)
+ mmio_invalidate_pid(npu, 0, true);
+ }
+}
+
 /*
  * Invalidate either a single address or an entire PID depending on
  * the value of va.
  */
 static void mmio_invalidate(struct npu_context *npu_context, int va,
- unsigned long address)
+ unsigned long address, bool flush)
 {
- int i, j, reg;
+ int i, j;
  struct npu *npu;
  struct pnv_phb *nphb;
  struct pci_dev *npdev;
- struct {
- struct npu *npu;
- int reg;
- } mmio_atsd_reg[NV_MAX_NPUS];
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
  unsigned long pid = npu_context->mm->context.id;
 
  /*
@@ -524,10 +561,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 
  if (va)
  mmio_atsd_reg[i].reg =
- mmio_invalidate_va(npu, address, pid);
+ mmio_invalidate_va(npu, address, pid,
+ flush);
  else
  mmio_atsd_reg[i].reg =
- mmio_invalidate_pid(npu, pid);
+ mmio_invalidate_pid(npu, pid, flush);
 
  /*
  * The NPU hardware forwards the shootdown to all GPUs
@@ -543,18 +581,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
  */
  flush_tlb_mm(npu_context->mm);
 
- /* Wait for all invalidations to complete */
- for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
- continue;
-
- /* Wait for completion */
- npu = mmio_atsd_reg[i].npu;
- reg = mmio_atsd_reg[i].reg;
- while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
- cpu_relax();
- put_mmio_atsd_reg(npu, reg);
- }
+ mmio_invalidate_wait(mmio_atsd_reg, flush);
+ if (flush)
+ /* Wait for the flush to complete */
+ mmio_invalidate_wait(mmio_atsd_reg, false);
 }
 
 static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -570,7 +600,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
  * There should be no more translation requests for this PID, but we
  * need to ensure any entries for it are removed from the TLB.
  */
- mmio_invalidate(npu_context, 0, 0);
+ mmio_invalidate(npu_context, 0, 0, true);
 }
 
 static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -580,7 +610,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
 {
  struct npu_context *npu_context = mn_to_npu_context(mn);
 
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -589,7 +619,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
 {
  struct npu_context *npu_context = mn_to_npu_context(mn);
 
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -599,8 +629,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
  struct npu_context *npu_context = mn_to_npu_context(mn);
  unsigned long address;
 
- for (address = start; address <= end; address += PAGE_SIZE)
- mmio_invalidate(npu_context, 1, address);
+ for (address = start; address < end; address += PAGE_SIZE)
+ mmio_invalidate(npu_context, 1, address, false);
+
+ /* Do the flush only on the final addess == end */
+ mmio_invalidate(npu_context, 1, address, true);
 }
 
 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -650,8 +683,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
  /* No nvlink associated with this GPU device */
  return ERR_PTR(-ENODEV);
 
- if (!mm) {
- /* kernel thread contexts are not supported */
+ if (!mm || mm->context.id == 0) {
+ /*
+ * Kernel thread contexts are not supported and context id 0 is
+ * reserved on the GPU.
+ */
  return ERR_PTR(-EINVAL);
  }
 
--
2.11.0


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[PATCH 2/2][Zesty] powerpc/npu-dma: Remove spurious WARN_ON when a PCI device has no of_node

Breno Leitao
From: Alistair Popple <[hidden email]>

BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1701272

Commit 4c3b89effc28 ("powerpc/powernv: Add sanity checks to
pnv_pci_get_{gpu|npu}_dev") introduced explicit warnings in
pnv_pci_get_npu_dev() when a PCIe device has no associated device-tree
node. However not all PCIe devices have an of_node and
pnv_pci_get_npu_dev() gets indirectly called at least once for every
PCIe device in the system. This results in spurious WARN_ON()'s so
remove it.

The same situation should not exist for pnv_pci_get_gpu_dev() as any
NPU based PCIe device requires a device-tree node.

Fixes: 4c3b89effc28 ("powerpc/powernv: Add sanity checks to pnv_pci_get_{gpu|npu}_dev")
Reported-by: Alexey Kardashevskiy <[hidden email]>
Signed-off-by: Alistair Popple <[hidden email]>
Signed-off-by: Michael Ellerman <[hidden email]>
(cherry picked from commit 377aa6b0efbaa29cfeecd8b9244641217f9544ca)
Signed-off-by: Breno Leitao <[hidden email]>
---
 arch/powerpc/platforms/powernv/npu-dma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 9e042cd4aa03..fa804e1d1e75 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -75,7 +75,8 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
  if (WARN_ON(!gpdev))
  return NULL;
 
- if (WARN_ON(!gpdev->dev.of_node))
+ /* Not all PCI devices have device-tree nodes */
+ if (!gpdev->dev.of_node)
  return NULL;
 
  /* Get assoicated PCI device */
--
2.11.0


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

ACK / APPLIED[artful]: [PATCH 1/2][Zesty] powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD

Seth Forshee
In reply to this post by Breno Leitao
On Thu, Jun 29, 2017 at 01:40:12PM -0300, Breno Leitao wrote:

> From: Alistair Popple <[hidden email]>
>
> BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1701272
>
> NPU2 requires an extra explicit flush to an active GPU PID when
> sending address translation shoot downs (ATSDs) to reliably flush the
> GPU TLB. This patch adds just such a flush at the end of each sequence
> of ATSDs.
>
> We can safely use PID 0 which is always reserved and active on the
> GPU. PID 0 is only used for init_mm which will never be a user mm on
> the GPU. To enforce this we add a check in pnv_npu2_init_context()
> just in case someone tries to use PID 0 on the GPU.
>
> Signed-off-by: Alistair Popple <[hidden email]>
> [mpe: Use true/false for bool literals]
> Signed-off-by: Michael Ellerman <[hidden email]>
> (cherry picked from commit bbd5ff50afffcf4a01d05367524736c57607a478)
> Signed-off-by: Breno Leitao <[hidden email]>

Clean cherry picks, scope is limited to powerpc. For both patches:

Acked-by: Seth Forshee <[hidden email]>

Applied to artful/master-next, thanks.

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

ACK/cmnt : [PATCH 1/2][Zesty] powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD

Stefan Bader-2
In reply to this post by Breno Leitao
On 29.06.2017 18:40, Breno Leitao wrote:

> From: Alistair Popple <[hidden email]>
>
> BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1701272
>
> NPU2 requires an extra explicit flush to an active GPU PID when
> sending address translation shoot downs (ATSDs) to reliably flush the
> GPU TLB. This patch adds just such a flush at the end of each sequence
> of ATSDs.
>
> We can safely use PID 0 which is always reserved and active on the
> GPU. PID 0 is only used for init_mm which will never be a user mm on
> the GPU. To enforce this we add a check in pnv_npu2_init_context()
> just in case someone tries to use PID 0 on the GPU.
>
> Signed-off-by: Alistair Popple <[hidden email]>
> [mpe: Use true/false for bool literals]
> Signed-off-by: Michael Ellerman <[hidden email]>
> (cherry picked from commit bbd5ff50afffcf4a01d05367524736c57607a478)
> Signed-off-by: Breno Leitao <[hidden email]>
Acked-by: Stefan Bader <[hidden email]>

> ---

Two rather minor nitpicks: Cannot remember exactly what it was but vaguely think
that when referring to bugs the form https://bugs.launchpad.net/bugs/<bugnr> is
preferred as more general. Other forms refer to specific projects/tasks. An it
is not like the linux task would become invalid anytime soon, so just a note.
The other thing, when submitting more than one patch it would be good to add a
cover email to that. For one to give some quick info about the set, and second
it gives the lazy reviewer an anchor to ack the set. ;)

That said this ACK should be for the 2nd patch as well.

-Stefan

>  arch/powerpc/platforms/powernv/npu-dma.c | 94 ++++++++++++++++++++++----------
>  1 file changed, 65 insertions(+), 29 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index 3184c9e265fb..9e042cd4aa03 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -448,7 +448,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
>   return mmio_atsd_reg;
>  }
>  
> -static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
> +static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
>  {
>   unsigned long launch;
>  
> @@ -464,12 +464,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
>   /* PID */
>   launch |= pid << PPC_BITLSHIFT(38);
>  
> + /* No flush */
> + launch |= !flush << PPC_BITLSHIFT(39);
> +
>   /* Invalidating the entire process doesn't use a va */
>   return mmio_launch_invalidate(npu, launch, 0);
>  }
>  
>  static int mmio_invalidate_va(struct npu *npu, unsigned long va,
> - unsigned long pid)
> + unsigned long pid, bool flush)
>  {
>   unsigned long launch;
>  
> @@ -485,26 +488,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
>   /* PID */
>   launch |= pid << PPC_BITLSHIFT(38);
>  
> + /* No flush */
> + launch |= !flush << PPC_BITLSHIFT(39);
> +
>   return mmio_launch_invalidate(npu, launch, va);
>  }
>  
>  #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
>  
> +struct mmio_atsd_reg {
> + struct npu *npu;
> + int reg;
> +};
> +
> +static void mmio_invalidate_wait(
> + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
> +{
> + struct npu *npu;
> + int i, reg;
> +
> + /* Wait for all invalidations to complete */
> + for (i = 0; i <= max_npu2_index; i++) {
> + if (mmio_atsd_reg[i].reg < 0)
> + continue;
> +
> + /* Wait for completion */
> + npu = mmio_atsd_reg[i].npu;
> + reg = mmio_atsd_reg[i].reg;
> + while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
> + cpu_relax();
> +
> + put_mmio_atsd_reg(npu, reg);
> +
> + /*
> + * The GPU requires two flush ATSDs to ensure all entries have
> + * been flushed. We use PID 0 as it will never be used for a
> + * process on the GPU.
> + */
> + if (flush)
> + mmio_invalidate_pid(npu, 0, true);
> + }
> +}
> +
>  /*
>   * Invalidate either a single address or an entire PID depending on
>   * the value of va.
>   */
>  static void mmio_invalidate(struct npu_context *npu_context, int va,
> - unsigned long address)
> + unsigned long address, bool flush)
>  {
> - int i, j, reg;
> + int i, j;
>   struct npu *npu;
>   struct pnv_phb *nphb;
>   struct pci_dev *npdev;
> - struct {
> - struct npu *npu;
> - int reg;
> - } mmio_atsd_reg[NV_MAX_NPUS];
> + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
>   unsigned long pid = npu_context->mm->context.id;
>  
>   /*
> @@ -524,10 +561,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
>  
>   if (va)
>   mmio_atsd_reg[i].reg =
> - mmio_invalidate_va(npu, address, pid);
> + mmio_invalidate_va(npu, address, pid,
> + flush);
>   else
>   mmio_atsd_reg[i].reg =
> - mmio_invalidate_pid(npu, pid);
> + mmio_invalidate_pid(npu, pid, flush);
>  
>   /*
>   * The NPU hardware forwards the shootdown to all GPUs
> @@ -543,18 +581,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
>   */
>   flush_tlb_mm(npu_context->mm);
>  
> - /* Wait for all invalidations to complete */
> - for (i = 0; i <= max_npu2_index; i++) {
> - if (mmio_atsd_reg[i].reg < 0)
> - continue;
> -
> - /* Wait for completion */
> - npu = mmio_atsd_reg[i].npu;
> - reg = mmio_atsd_reg[i].reg;
> - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
> - cpu_relax();
> - put_mmio_atsd_reg(npu, reg);
> - }
> + mmio_invalidate_wait(mmio_atsd_reg, flush);
> + if (flush)
> + /* Wait for the flush to complete */
> + mmio_invalidate_wait(mmio_atsd_reg, false);
>  }
>  
>  static void pnv_npu2_mn_release(struct mmu_notifier *mn,
> @@ -570,7 +600,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
>   * There should be no more translation requests for this PID, but we
>   * need to ensure any entries for it are removed from the TLB.
>   */
> - mmio_invalidate(npu_context, 0, 0);
> + mmio_invalidate(npu_context, 0, 0, true);
>  }
>  
>  static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
> @@ -580,7 +610,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
>  {
>   struct npu_context *npu_context = mn_to_npu_context(mn);
>  
> - mmio_invalidate(npu_context, 1, address);
> + mmio_invalidate(npu_context, 1, address, true);
>  }
>  
>  static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
> @@ -589,7 +619,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
>  {
>   struct npu_context *npu_context = mn_to_npu_context(mn);
>  
> - mmio_invalidate(npu_context, 1, address);
> + mmio_invalidate(npu_context, 1, address, true);
>  }
>  
>  static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
> @@ -599,8 +629,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
>   struct npu_context *npu_context = mn_to_npu_context(mn);
>   unsigned long address;
>  
> - for (address = start; address <= end; address += PAGE_SIZE)
> - mmio_invalidate(npu_context, 1, address);
> + for (address = start; address < end; address += PAGE_SIZE)
> + mmio_invalidate(npu_context, 1, address, false);
> +
> + /* Do the flush only on the final addess == end */
> + mmio_invalidate(npu_context, 1, address, true);
>  }
>  
>  static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
> @@ -650,8 +683,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>   /* No nvlink associated with this GPU device */
>   return ERR_PTR(-ENODEV);
>  
> - if (!mm) {
> - /* kernel thread contexts are not supported */
> + if (!mm || mm->context.id == 0) {
> + /*
> + * Kernel thread contexts are not supported and context id 0 is
> + * reserved on the GPU.
> + */
>   return ERR_PTR(-EINVAL);
>   }
>  
>


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team

signature.asc (836 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: ACK/cmnt : [PATCH 1/2][Zesty] powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD

Thadeu Lima de Souza Cascardo-3
On Mon, Jul 10, 2017 at 02:42:43PM +0200, Stefan Bader wrote:

> On 29.06.2017 18:40, Breno Leitao wrote:
> > From: Alistair Popple <[hidden email]>
> >
> > BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1701272
> >
> > NPU2 requires an extra explicit flush to an active GPU PID when
> > sending address translation shoot downs (ATSDs) to reliably flush the
> > GPU TLB. This patch adds just such a flush at the end of each sequence
> > of ATSDs.
> >
> > We can safely use PID 0 which is always reserved and active on the
> > GPU. PID 0 is only used for init_mm which will never be a user mm on
> > the GPU. To enforce this we add a check in pnv_npu2_init_context()
> > just in case someone tries to use PID 0 on the GPU.
> >
> > Signed-off-by: Alistair Popple <[hidden email]>
> > [mpe: Use true/false for bool literals]
> > Signed-off-by: Michael Ellerman <[hidden email]>
> > (cherry picked from commit bbd5ff50afffcf4a01d05367524736c57607a478)
> > Signed-off-by: Breno Leitao <[hidden email]>
> Acked-by: Stefan Bader <[hidden email]>
>
> > ---
>
> Two rather minor nitpicks: Cannot remember exactly what it was but vaguely think
> that when referring to bugs the form https://bugs.launchpad.net/bugs/<bugnr> is
> preferred as more general. Other forms refer to specific projects/tasks. An it
> is not like the linux task would become invalid anytime soon, so just a note.
> The other thing, when submitting more than one patch it would be good to add a
> cover email to that. For one to give some quick info about the set, and second
> it gives the lazy reviewer an anchor to ack the set. ;)

Also, some tools some of us use care about the ACKs sent to cover
letters, making it very helpful if you had sent one.

Cascardo.

>
> That said this ACK should be for the 2nd patch as well.
>
> -Stefan
> >  arch/powerpc/platforms/powernv/npu-dma.c | 94 ++++++++++++++++++++++----------
> >  1 file changed, 65 insertions(+), 29 deletions(-)
> >
> > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> > index 3184c9e265fb..9e042cd4aa03 100644
> > --- a/arch/powerpc/platforms/powernv/npu-dma.c
> > +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> > @@ -448,7 +448,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
> >   return mmio_atsd_reg;
> >  }
> >  
> > -static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
> > +static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
> >  {
> >   unsigned long launch;
> >  
> > @@ -464,12 +464,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
> >   /* PID */
> >   launch |= pid << PPC_BITLSHIFT(38);
> >  
> > + /* No flush */
> > + launch |= !flush << PPC_BITLSHIFT(39);
> > +
> >   /* Invalidating the entire process doesn't use a va */
> >   return mmio_launch_invalidate(npu, launch, 0);
> >  }
> >  
> >  static int mmio_invalidate_va(struct npu *npu, unsigned long va,
> > - unsigned long pid)
> > + unsigned long pid, bool flush)
> >  {
> >   unsigned long launch;
> >  
> > @@ -485,26 +488,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
> >   /* PID */
> >   launch |= pid << PPC_BITLSHIFT(38);
> >  
> > + /* No flush */
> > + launch |= !flush << PPC_BITLSHIFT(39);
> > +
> >   return mmio_launch_invalidate(npu, launch, va);
> >  }
> >  
> >  #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
> >  
> > +struct mmio_atsd_reg {
> > + struct npu *npu;
> > + int reg;
> > +};
> > +
> > +static void mmio_invalidate_wait(
> > + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
> > +{
> > + struct npu *npu;
> > + int i, reg;
> > +
> > + /* Wait for all invalidations to complete */
> > + for (i = 0; i <= max_npu2_index; i++) {
> > + if (mmio_atsd_reg[i].reg < 0)
> > + continue;
> > +
> > + /* Wait for completion */
> > + npu = mmio_atsd_reg[i].npu;
> > + reg = mmio_atsd_reg[i].reg;
> > + while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
> > + cpu_relax();
> > +
> > + put_mmio_atsd_reg(npu, reg);
> > +
> > + /*
> > + * The GPU requires two flush ATSDs to ensure all entries have
> > + * been flushed. We use PID 0 as it will never be used for a
> > + * process on the GPU.
> > + */
> > + if (flush)
> > + mmio_invalidate_pid(npu, 0, true);
> > + }
> > +}
> > +
> >  /*
> >   * Invalidate either a single address or an entire PID depending on
> >   * the value of va.
> >   */
> >  static void mmio_invalidate(struct npu_context *npu_context, int va,
> > - unsigned long address)
> > + unsigned long address, bool flush)
> >  {
> > - int i, j, reg;
> > + int i, j;
> >   struct npu *npu;
> >   struct pnv_phb *nphb;
> >   struct pci_dev *npdev;
> > - struct {
> > - struct npu *npu;
> > - int reg;
> > - } mmio_atsd_reg[NV_MAX_NPUS];
> > + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
> >   unsigned long pid = npu_context->mm->context.id;
> >  
> >   /*
> > @@ -524,10 +561,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
> >  
> >   if (va)
> >   mmio_atsd_reg[i].reg =
> > - mmio_invalidate_va(npu, address, pid);
> > + mmio_invalidate_va(npu, address, pid,
> > + flush);
> >   else
> >   mmio_atsd_reg[i].reg =
> > - mmio_invalidate_pid(npu, pid);
> > + mmio_invalidate_pid(npu, pid, flush);
> >  
> >   /*
> >   * The NPU hardware forwards the shootdown to all GPUs
> > @@ -543,18 +581,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
> >   */
> >   flush_tlb_mm(npu_context->mm);
> >  
> > - /* Wait for all invalidations to complete */
> > - for (i = 0; i <= max_npu2_index; i++) {
> > - if (mmio_atsd_reg[i].reg < 0)
> > - continue;
> > -
> > - /* Wait for completion */
> > - npu = mmio_atsd_reg[i].npu;
> > - reg = mmio_atsd_reg[i].reg;
> > - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
> > - cpu_relax();
> > - put_mmio_atsd_reg(npu, reg);
> > - }
> > + mmio_invalidate_wait(mmio_atsd_reg, flush);
> > + if (flush)
> > + /* Wait for the flush to complete */
> > + mmio_invalidate_wait(mmio_atsd_reg, false);
> >  }
> >  
> >  static void pnv_npu2_mn_release(struct mmu_notifier *mn,
> > @@ -570,7 +600,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
> >   * There should be no more translation requests for this PID, but we
> >   * need to ensure any entries for it are removed from the TLB.
> >   */
> > - mmio_invalidate(npu_context, 0, 0);
> > + mmio_invalidate(npu_context, 0, 0, true);
> >  }
> >  
> >  static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
> > @@ -580,7 +610,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
> >  {
> >   struct npu_context *npu_context = mn_to_npu_context(mn);
> >  
> > - mmio_invalidate(npu_context, 1, address);
> > + mmio_invalidate(npu_context, 1, address, true);
> >  }
> >  
> >  static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
> > @@ -589,7 +619,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
> >  {
> >   struct npu_context *npu_context = mn_to_npu_context(mn);
> >  
> > - mmio_invalidate(npu_context, 1, address);
> > + mmio_invalidate(npu_context, 1, address, true);
> >  }
> >  
> >  static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
> > @@ -599,8 +629,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
> >   struct npu_context *npu_context = mn_to_npu_context(mn);
> >   unsigned long address;
> >  
> > - for (address = start; address <= end; address += PAGE_SIZE)
> > - mmio_invalidate(npu_context, 1, address);
> > + for (address = start; address < end; address += PAGE_SIZE)
> > + mmio_invalidate(npu_context, 1, address, false);
> > +
> > + /* Do the flush only on the final addess == end */
> > + mmio_invalidate(npu_context, 1, address, true);
> >  }
> >  
> >  static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
> > @@ -650,8 +683,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
> >   /* No nvlink associated with this GPU device */
> >   return ERR_PTR(-ENODEV);
> >  
> > - if (!mm) {
> > - /* kernel thread contexts are not supported */
> > + if (!mm || mm->context.id == 0) {
> > + /*
> > + * Kernel thread contexts are not supported and context id 0 is
> > + * reserved on the GPU.
> > + */
> >   return ERR_PTR(-EINVAL);
> >   }
> >  
> >
>
>




> --
> kernel-team mailing list
> [hidden email]
> https://lists.ubuntu.com/mailman/listinfo/kernel-team


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

APPLIED: [PATCH 1/2][Zesty] powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD

Thadeu Lima de Souza Cascardo-3
In reply to this post by Breno Leitao
Both applied to zesty master-next branch.

Thanks.
Cascardo.

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Loading...