[PATCH][SRU][Xenial] i40e: use valid online CPU on q_vector initialization

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[PATCH][SRU][Xenial] i40e: use valid online CPU on q_vector initialization

Guilherme G. Piccoli
BugLink: https://bugs.launchpad.net/bugs/1703663

Currently, the q_vector initialization routine sets the affinity_mask
of a q_vector based on v_idx value. Meaning a loop iterates on v_idx,
which is an incremental value, and the cpumask is created based on
this value.

This is a problem in systems with multiple logical CPUs per core (like in
SMT scenarios). If we disable some logical CPUs, by turning SMT off for
example, we will end up with a sparse cpu_online_mask, i.e., only the first
CPU in a core is online, and incremental filling in q_vector cpumask might
lead to multiple offline CPUs being assigned to q_vectors.

Example: if we have a system with 8 cores each one containing 8 logical
CPUs (SMT == 8 in this case), we have 64 CPUs in total. But if SMT is
disabled, only the 1st CPU in each core remains online, so the
cpu_online_mask in this case would have only 8 bits set, in a sparse way.

In general case, when SMT is off the cpu_online_mask has only C bits set:
0, 1*N, 2*N, ..., C*(N-1)  where
C == # of cores;
N == # of logical CPUs per core.
In our example, only bits 0, 8, 16, 24, 32, 40, 48, 56 would be set.

This patch changes the way q_vector's affinity_mask is created: it iterates
on v_idx, but consumes the CPU index from the cpu_online_mask instead of
just using the v_idx incremental value.

No functional changes were introduced.

Signed-off-by: Guilherme G Piccoli <[hidden email]>
Tested-by: Andrew Bowers <[hidden email]>
Signed-off-by: Jeff Kirsher <[hidden email]>
(cherry picked from commit 7f6c553902bfa1c4e3f6cfa955c5ea036c7fe8e4)
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 8f3b53e0dc46..f2ffa7ef74fd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7694,10 +7694,11 @@ static int i40e_init_msix(struct i40e_pf *pf)
  * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
  * @vsi: the VSI being configured
  * @v_idx: index of the vector in the vsi struct
+ * @cpu: cpu to be used on affinity_mask
  *
  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
  **/
-static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
+static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
 {
  struct i40e_q_vector *q_vector;
 
@@ -7708,7 +7709,8 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
 
  q_vector->vsi = vsi;
  q_vector->v_idx = v_idx;
- cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+ cpumask_set_cpu(cpu, &q_vector->affinity_mask);
+
  if (vsi->netdev)
  netif_napi_add(vsi->netdev, &q_vector->napi,
        i40e_napi_poll, NAPI_POLL_WEIGHT);
@@ -7732,8 +7734,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
 static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
 {
  struct i40e_pf *pf = vsi->back;
- int v_idx, num_q_vectors;
- int err;
+ int err, v_idx, num_q_vectors, current_cpu;
 
  /* if not MSIX, give the one vector only to the LAN VSI */
  if (pf->flags & I40E_FLAG_MSIX_ENABLED)
@@ -7743,10 +7744,15 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
  else
  return -EINVAL;
 
+ current_cpu = cpumask_first(cpu_online_mask);
+
  for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
- err = i40e_vsi_alloc_q_vector(vsi, v_idx);
+ err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu);
  if (err)
  goto err_out;
+ current_cpu = cpumask_next(current_cpu, cpu_online_mask);
+ if (unlikely(current_cpu >= nr_cpu_ids))
+ current_cpu = cpumask_first(cpu_online_mask);
  }
 
  return 0;
--
2.13.0


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

ACK: [PATCH][SRU][Xenial] i40e: use valid online CPU on q_vector initialization

Kleber Souza
On 07/11/17 20:58, Guilherme G. Piccoli wrote:

> BugLink: https://bugs.launchpad.net/bugs/1703663
>
> Currently, the q_vector initialization routine sets the affinity_mask
> of a q_vector based on v_idx value. Meaning a loop iterates on v_idx,
> which is an incremental value, and the cpumask is created based on
> this value.
>
> This is a problem in systems with multiple logical CPUs per core (like in
> SMT scenarios). If we disable some logical CPUs, by turning SMT off for
> example, we will end up with a sparse cpu_online_mask, i.e., only the first
> CPU in a core is online, and incremental filling in q_vector cpumask might
> lead to multiple offline CPUs being assigned to q_vectors.
>
> Example: if we have a system with 8 cores each one containing 8 logical
> CPUs (SMT == 8 in this case), we have 64 CPUs in total. But if SMT is
> disabled, only the 1st CPU in each core remains online, so the
> cpu_online_mask in this case would have only 8 bits set, in a sparse way.
>
> In general case, when SMT is off the cpu_online_mask has only C bits set:
> 0, 1*N, 2*N, ..., C*(N-1)  where
> C == # of cores;
> N == # of logical CPUs per core.
> In our example, only bits 0, 8, 16, 24, 32, 40, 48, 56 would be set.
>
> This patch changes the way q_vector's affinity_mask is created: it iterates
> on v_idx, but consumes the CPU index from the cpu_online_mask instead of
> just using the v_idx incremental value.
>
> No functional changes were introduced.
>
> Signed-off-by: Guilherme G Piccoli <[hidden email]>
> Tested-by: Andrew Bowers <[hidden email]>
> Signed-off-by: Jeff Kirsher <[hidden email]>
> (cherry picked from commit 7f6c553902bfa1c4e3f6cfa955c5ea036c7fe8e4)
> ---
>  drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++++++++++-----
>  1 file changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index 8f3b53e0dc46..f2ffa7ef74fd 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -7694,10 +7694,11 @@ static int i40e_init_msix(struct i40e_pf *pf)
>   * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
>   * @vsi: the VSI being configured
>   * @v_idx: index of the vector in the vsi struct
> + * @cpu: cpu to be used on affinity_mask
>   *
>   * We allocate one q_vector.  If allocation fails we return -ENOMEM.
>   **/
> -static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
> +static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
>  {
>   struct i40e_q_vector *q_vector;
>  
> @@ -7708,7 +7709,8 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
>  
>   q_vector->vsi = vsi;
>   q_vector->v_idx = v_idx;
> - cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
> + cpumask_set_cpu(cpu, &q_vector->affinity_mask);
> +
>   if (vsi->netdev)
>   netif_napi_add(vsi->netdev, &q_vector->napi,
>         i40e_napi_poll, NAPI_POLL_WEIGHT);
> @@ -7732,8 +7734,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
>  static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
>  {
>   struct i40e_pf *pf = vsi->back;
> - int v_idx, num_q_vectors;
> - int err;
> + int err, v_idx, num_q_vectors, current_cpu;
>  
>   /* if not MSIX, give the one vector only to the LAN VSI */
>   if (pf->flags & I40E_FLAG_MSIX_ENABLED)
> @@ -7743,10 +7744,15 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
>   else
>   return -EINVAL;
>  
> + current_cpu = cpumask_first(cpu_online_mask);
> +
>   for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
> - err = i40e_vsi_alloc_q_vector(vsi, v_idx);
> + err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu);
>   if (err)
>   goto err_out;
> + current_cpu = cpumask_next(current_cpu, cpu_online_mask);
> + if (unlikely(current_cpu >= nr_cpu_ids))
> + current_cpu = cpumask_first(cpu_online_mask);
>   }
>  
>   return 0;
>

Clean cherry-pick, good SRU justification, no functional changes and
limited to a single driver.

Acked-by: Kleber Sacilotto de Souza <[hidden email]>

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

ACK: [PATCH][SRU][Xenial] i40e: use valid online CPU on q_vector initialization

Stefan Bader-2
In reply to this post by Guilherme G. Piccoli
On 11.07.2017 20:58, Guilherme G. Piccoli wrote:

> BugLink: https://bugs.launchpad.net/bugs/1703663
>
> Currently, the q_vector initialization routine sets the affinity_mask
> of a q_vector based on v_idx value. Meaning a loop iterates on v_idx,
> which is an incremental value, and the cpumask is created based on
> this value.
>
> This is a problem in systems with multiple logical CPUs per core (like in
> SMT scenarios). If we disable some logical CPUs, by turning SMT off for
> example, we will end up with a sparse cpu_online_mask, i.e., only the first
> CPU in a core is online, and incremental filling in q_vector cpumask might
> lead to multiple offline CPUs being assigned to q_vectors.
>
> Example: if we have a system with 8 cores each one containing 8 logical
> CPUs (SMT == 8 in this case), we have 64 CPUs in total. But if SMT is
> disabled, only the 1st CPU in each core remains online, so the
> cpu_online_mask in this case would have only 8 bits set, in a sparse way.
>
> In general case, when SMT is off the cpu_online_mask has only C bits set:
> 0, 1*N, 2*N, ..., C*(N-1)  where
> C == # of cores;
> N == # of logical CPUs per core.
> In our example, only bits 0, 8, 16, 24, 32, 40, 48, 56 would be set.
>
> This patch changes the way q_vector's affinity_mask is created: it iterates
> on v_idx, but consumes the CPU index from the cpu_online_mask instead of
> just using the v_idx incremental value.
>
> No functional changes were introduced.
>
> Signed-off-by: Guilherme G Piccoli <[hidden email]>
> Tested-by: Andrew Bowers <[hidden email]>
> Signed-off-by: Jeff Kirsher <[hidden email]>

> (cherry picked from commit 7f6c553902bfa1c4e3f6cfa955c5ea036c7fe8e4)
Signed-off-by: Guilherme G Piccoli <[hidden email]>
Acked-by: Stefan Bader <[hidden email]>

> ---
>  drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++++++++++-----
>  1 file changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index 8f3b53e0dc46..f2ffa7ef74fd 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -7694,10 +7694,11 @@ static int i40e_init_msix(struct i40e_pf *pf)
>   * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
>   * @vsi: the VSI being configured
>   * @v_idx: index of the vector in the vsi struct
> + * @cpu: cpu to be used on affinity_mask
>   *
>   * We allocate one q_vector.  If allocation fails we return -ENOMEM.
>   **/
> -static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
> +static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
>  {
>   struct i40e_q_vector *q_vector;
>  
> @@ -7708,7 +7709,8 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
>  
>   q_vector->vsi = vsi;
>   q_vector->v_idx = v_idx;
> - cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
> + cpumask_set_cpu(cpu, &q_vector->affinity_mask);
> +
>   if (vsi->netdev)
>   netif_napi_add(vsi->netdev, &q_vector->napi,
>         i40e_napi_poll, NAPI_POLL_WEIGHT);
> @@ -7732,8 +7734,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
>  static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
>  {
>   struct i40e_pf *pf = vsi->back;
> - int v_idx, num_q_vectors;
> - int err;
> + int err, v_idx, num_q_vectors, current_cpu;
>  
>   /* if not MSIX, give the one vector only to the LAN VSI */
>   if (pf->flags & I40E_FLAG_MSIX_ENABLED)
> @@ -7743,10 +7744,15 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
>   else
>   return -EINVAL;
>  
> + current_cpu = cpumask_first(cpu_online_mask);
> +
>   for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
> - err = i40e_vsi_alloc_q_vector(vsi, v_idx);
> + err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu);
>   if (err)
>   goto err_out;
> + current_cpu = cpumask_next(current_cpu, cpu_online_mask);
> + if (unlikely(current_cpu >= nr_cpu_ids))
> + current_cpu = cpumask_first(cpu_online_mask);
>   }
>  
>   return 0;
>


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team

signature.asc (836 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

APPLIED: [PATCH][SRU][Xenial] i40e: use valid online CPU on q_vector initialization

Thadeu Lima de Souza Cascardo-3
On Wed, Jul 12, 2017 at 11:11:06AM +0200, Stefan Bader wrote:

> On 11.07.2017 20:58, Guilherme G. Piccoli wrote:
> > BugLink: https://bugs.launchpad.net/bugs/1703663
> >
> > Currently, the q_vector initialization routine sets the affinity_mask
> > of a q_vector based on v_idx value. Meaning a loop iterates on v_idx,
> > which is an incremental value, and the cpumask is created based on
> > this value.
> >
> > This is a problem in systems with multiple logical CPUs per core (like in
> > SMT scenarios). If we disable some logical CPUs, by turning SMT off for
> > example, we will end up with a sparse cpu_online_mask, i.e., only the first
> > CPU in a core is online, and incremental filling in q_vector cpumask might
> > lead to multiple offline CPUs being assigned to q_vectors.
> >
> > Example: if we have a system with 8 cores each one containing 8 logical
> > CPUs (SMT == 8 in this case), we have 64 CPUs in total. But if SMT is
> > disabled, only the 1st CPU in each core remains online, so the
> > cpu_online_mask in this case would have only 8 bits set, in a sparse way.
> >
> > In general case, when SMT is off the cpu_online_mask has only C bits set:
> > 0, 1*N, 2*N, ..., C*(N-1)  where
> > C == # of cores;
> > N == # of logical CPUs per core.
> > In our example, only bits 0, 8, 16, 24, 32, 40, 48, 56 would be set.
> >
> > This patch changes the way q_vector's affinity_mask is created: it iterates
> > on v_idx, but consumes the CPU index from the cpu_online_mask instead of
> > just using the v_idx incremental value.
> >
> > No functional changes were introduced.
> >
> > Signed-off-by: Guilherme G Piccoli <[hidden email]>
> > Tested-by: Andrew Bowers <[hidden email]>
> > Signed-off-by: Jeff Kirsher <[hidden email]>
>
> > (cherry picked from commit 7f6c553902bfa1c4e3f6cfa955c5ea036c7fe8e4)
> Signed-off-by: Guilherme G Piccoli <[hidden email]>
> Acked-by: Stefan Bader <[hidden email]>

His Sign-off is already there.

Applied to xenial master-next branch.

Thanks.
Cascardo.

>
> > ---
> >  drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++++++++++-----
> >  1 file changed, 11 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> > index 8f3b53e0dc46..f2ffa7ef74fd 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> > @@ -7694,10 +7694,11 @@ static int i40e_init_msix(struct i40e_pf *pf)
> >   * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
> >   * @vsi: the VSI being configured
> >   * @v_idx: index of the vector in the vsi struct
> > + * @cpu: cpu to be used on affinity_mask
> >   *
> >   * We allocate one q_vector.  If allocation fails we return -ENOMEM.
> >   **/
> > -static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
> > +static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
> >  {
> >   struct i40e_q_vector *q_vector;
> >  
> > @@ -7708,7 +7709,8 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
> >  
> >   q_vector->vsi = vsi;
> >   q_vector->v_idx = v_idx;
> > - cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
> > + cpumask_set_cpu(cpu, &q_vector->affinity_mask);
> > +
> >   if (vsi->netdev)
> >   netif_napi_add(vsi->netdev, &q_vector->napi,
> >         i40e_napi_poll, NAPI_POLL_WEIGHT);
> > @@ -7732,8 +7734,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
> >  static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
> >  {
> >   struct i40e_pf *pf = vsi->back;
> > - int v_idx, num_q_vectors;
> > - int err;
> > + int err, v_idx, num_q_vectors, current_cpu;
> >  
> >   /* if not MSIX, give the one vector only to the LAN VSI */
> >   if (pf->flags & I40E_FLAG_MSIX_ENABLED)
> > @@ -7743,10 +7744,15 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
> >   else
> >   return -EINVAL;
> >  
> > + current_cpu = cpumask_first(cpu_online_mask);
> > +
> >   for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
> > - err = i40e_vsi_alloc_q_vector(vsi, v_idx);
> > + err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu);
> >   if (err)
> >   goto err_out;
> > + current_cpu = cpumask_next(current_cpu, cpu_online_mask);
> > + if (unlikely(current_cpu >= nr_cpu_ids))
> > + current_cpu = cpumask_first(cpu_online_mask);
> >   }
> >  
> >   return 0;
> >
>
>




> --
> kernel-team mailing list
> [hidden email]
> https://lists.ubuntu.com/mailman/listinfo/kernel-team


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Loading...