[azure][PATCH v1 0/9] [Hyper-V] Transparent SR-IOV solves bonding race conditions

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
11 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[azure][PATCH v1 0/9] [Hyper-V] Transparent SR-IOV solves bonding race conditions

Marcelo Henrique Cerri
BugLink: http://bugs.launchpad.net/bugs/1708469

Transparent SR-IOV support for Azure.

Those are clean cherry-picks from mainline and net-next, with exception of
the commit that removes the bonding script. Since the script is being removed,
it doesn't make much sense to pick additional changes just to get a clean
pick for that.

Besides that, some changes that were previously sauced are already covered
by the picked commits from net-next. Because of that, I simply reverted those
commits.

Haiyang Zhang (1):
  hv_netvsc: Fix the carrier state error when data path is off

Marcelo Henrique Cerri (3):
  Revert "UBUNTU: SAUCE: netvsc: keep track of vf passthrough
    statistics"
  Revert "UBUNTU: SAUCE: netvsc: optional transparent fail over"
  Revert "UBUNTU: SAUCE: netvsc: optimize calculation of number of
    slots"

stephen hemminger (5):
  UBUNTU: SAUCE: netvsc: add some rtnl_dereference annotations
  UBUNTU: SAUCE: netvsc: include rtnetlink.h
  UBUNTU: SAUCE: netvsc: transparent VF management
  UBUNTU: SAUCE: netvsc: add documentation
  netvsc: remove bonding setup script

 Documentation/networking/netvsc.txt |  63 +++++++
 MAINTAINERS                         |   1 +
 drivers/net/hyperv/hyperv_net.h     |  14 +-
 drivers/net/hyperv/netvsc.c         |   7 +-
 drivers/net/hyperv/netvsc_drv.c     | 343 ++++++++++++++++++++----------------
 drivers/net/hyperv/rndis_filter.c   |   1 +
 tools/hv/bondvf.sh                  | 193 --------------------
 7 files changed, 272 insertions(+), 350 deletions(-)
 create mode 100644 Documentation/networking/netvsc.txt
 delete mode 100755 tools/hv/bondvf.sh

--
2.7.4


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[azure][PATCH v1 1/9] Revert "UBUNTU: SAUCE: netvsc: keep track of vf passthrough statistics"

Marcelo Henrique Cerri
BugLink: http://bugs.launchpad.net/bugs/1708469

This reverts commit 350771e4b20a888e9d5d857f46497ae08e8cf8bb.

That commit was replaced by net-next commit 0c195567a8f6 ("netvsc:
transparent VF management").

Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
---
 drivers/net/hyperv/hyperv_net.h |   1 -
 drivers/net/hyperv/netvsc_drv.c | 127 +++++++---------------------------------
 2 files changed, 20 insertions(+), 108 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index c75188297ffd..d20d794d396f 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -718,7 +718,6 @@ struct net_device_context {
  /* State to manage the associated VF interface. */
  struct net_device __rcu *vf_netdev;
  struct work_struct vf_takeover;
- struct pcpu_sw_netstats __percpu *vf_stats;
 
  /* 1: allocated, serial number is valid. 0: not allocated */
  u32 vf_alloc;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index cefd7d895397..f93106e69d80 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -374,16 +374,8 @@ static u32 net_checksum_info(struct sk_buff *skb)
 static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev,
   struct sk_buff *skb)
 {
- struct net_device_context *ndev_ctx = netdev_priv(net);
- struct pcpu_sw_netstats *pcpu_stats
- = this_cpu_ptr(ndev_ctx->vf_stats);
-
  skb->dev = vf_netdev;
-
- u64_stats_update_begin(&pcpu_stats->syncp);
- pcpu_stats->tx_packets++;
- pcpu_stats->tx_bytes += skb->len;
- u64_stats_update_end(&pcpu_stats->syncp);
+ /* TODO stats */
 
  return dev_queue_xmit(skb);
 }
@@ -946,35 +938,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
  return ret;
 }
 
-static void netvsc_get_vf_stats(struct net_device *net,
- struct pcpu_sw_netstats *tot)
-{
- struct net_device_context *ndev_ctx = netdev_priv(net);
- int i;
-
- memset(tot, 0, sizeof(*tot));
-
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *stats
- = per_cpu_ptr(ndev_ctx->vf_stats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- rx_packets = stats->rx_packets;
- tx_packets = stats->tx_packets;
- rx_bytes = stats->rx_bytes;
- tx_bytes = stats->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes   += rx_bytes;
- tot->tx_bytes   += tx_bytes;
- }
-}
-
 static void netvsc_get_stats64(struct net_device *net,
        struct rtnl_link_stats64 *t)
 {
@@ -985,19 +948,6 @@ static void netvsc_get_stats64(struct net_device *net,
  if (!nvdev)
  return;
 
- netdev_stats_to_stats64(t, &net->stats);
-
- if (transparent_vf) {
- struct pcpu_sw_netstats vf_tot;
-
- netvsc_get_vf_stats(net, &vf_tot);
-
- t->rx_packets += vf_tot.rx_packets;
- t->tx_packets += vf_tot.tx_packets;
- t->rx_bytes   += vf_tot.rx_bytes;
- t->tx_bytes   += vf_tot.tx_bytes;
- }
-
  for (i = 0; i < nvdev->num_chn; i++) {
  const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
  const struct netvsc_stats *stats;
@@ -1026,6 +976,12 @@ static void netvsc_get_stats64(struct net_device *net,
  t->rx_packets += packets;
  t->multicast += multicast;
  }
+
+ t->tx_dropped = net->stats.tx_dropped;
+ t->tx_errors = net->stats.tx_errors;
+
+ t->rx_dropped = net->stats.rx_dropped;
+ t->rx_errors = net->stats.rx_errors;
 }
 
 static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
@@ -1061,15 +1017,9 @@ static const struct {
  { "tx_no_space",  offsetof(struct netvsc_ethtool_stats, tx_no_space) },
  { "tx_too_big",  offsetof(struct netvsc_ethtool_stats, tx_too_big) },
  { "tx_busy",  offsetof(struct netvsc_ethtool_stats, tx_busy) },
-}, vf_stats[] = {
- { "vf_rx_packets", offsetof(struct pcpu_sw_netstats, rx_packets) },
- { "vf_rx_bytes",   offsetof(struct pcpu_sw_netstats, rx_bytes) },
- { "vf_tx_packets", offsetof(struct pcpu_sw_netstats, tx_packets) },
- { "vf_tx_bytes",   offsetof(struct pcpu_sw_netstats, tx_bytes) },
 };
 
 #define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats)
-#define NETVSC_VF_STATS_LEN ARRAY_SIZE(vf_stats)
 
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
@@ -1084,9 +1034,7 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 
  switch (string_set) {
  case ETH_SS_STATS:
- return NETVSC_GLOBAL_STATS_LEN
- + (transparent_vf ? NETVSC_VF_STATS_LEN : 0)
- + NETVSC_QUEUE_STATS_LEN(nvdev);
+ return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev);
  default:
  return -EINVAL;
  }
@@ -1109,14 +1057,6 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
  for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
  data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
 
- if (transparent_vf) {
- struct pcpu_sw_netstats sum;
-
- netvsc_get_vf_stats(dev, &sum);
- for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
- data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
- }
-
  for (j = 0; j < nvdev->num_chn; j++) {
  qstats = &nvdev->chan_table[j].tx_stats;
 
@@ -1151,18 +1091,11 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
  switch (stringset) {
  case ETH_SS_STATS:
- for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) {
- memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN);
- p += ETH_GSTRING_LEN;
- }
-
- if (transparent_vf) {
- for (i = 0; i < ARRAY_SIZE(vf_stats); i++) {
- memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN);
- p += ETH_GSTRING_LEN;
- }
- }
+ for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
+ memcpy(p + i * ETH_GSTRING_LEN,
+       netvsc_stats[i].name, ETH_GSTRING_LEN);
 
+ p += i * ETH_GSTRING_LEN;
  for (i = 0; i < nvdev->num_chn; i++) {
  sprintf(p, "tx_queue_%u_packets", i);
  p += ETH_GSTRING_LEN;
@@ -1495,17 +1428,10 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
 {
  struct sk_buff *skb = *pskb;
  struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data);
- struct net_device_context *ndev_ctx = netdev_priv(ndev);
- struct pcpu_sw_netstats *pcpu_stats
- = this_cpu_ptr(ndev_ctx->vf_stats);
 
  skb->dev = ndev;
 
- u64_stats_update_begin(&pcpu_stats->syncp);
- pcpu_stats->rx_packets++;
- pcpu_stats->rx_bytes += skb->len;
- u64_stats_update_end(&pcpu_stats->syncp);
-
+ /* TODO: stats */
  return RX_HANDLER_ANOTHER;
 }
 
@@ -1720,12 +1646,12 @@ static int netvsc_probe(struct hv_device *dev,
  struct net_device_context *net_device_ctx;
  struct netvsc_device_info device_info;
  struct netvsc_device *nvdev;
- int ret = -ENOMEM;
+ int ret;
 
  net = alloc_etherdev_mq(sizeof(struct net_device_context),
  VRSS_CHANNEL_MAX);
  if (!net)
- goto no_net;
+ return -ENOMEM;
 
  netif_carrier_off(net);
 
@@ -1745,10 +1671,6 @@ static int netvsc_probe(struct hv_device *dev,
  spin_lock_init(&net_device_ctx->lock);
  INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
  INIT_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
- net_device_ctx->vf_stats
- = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!net_device_ctx->vf_stats)
- goto no_stats;
 
  net->netdev_ops = &device_ops;
  net->ethtool_ops = &ethtool_ops;
@@ -1768,9 +1690,10 @@ static int netvsc_probe(struct hv_device *dev,
  if (IS_ERR(nvdev)) {
  ret = PTR_ERR(nvdev);
  netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
- goto rndis_failed;
+ free_netdev(net);
+ hv_set_drvdata(dev, NULL);
+ return ret;
  }
-
  memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
 
  /* hw_features computed in rndis_filter_device_add */
@@ -1794,20 +1717,11 @@ static int netvsc_probe(struct hv_device *dev,
  ret = register_netdev(net);
  if (ret != 0) {
  pr_err("Unable to register netdev.\n");
- goto register_failed;
+ rndis_filter_device_remove(dev, nvdev);
+ free_netdev(net);
  }
 
  return ret;
-
-register_failed:
- rndis_filter_device_remove(dev, nvdev);
-rndis_failed:
- free_percpu(net_device_ctx->vf_stats);
-no_stats:
- hv_set_drvdata(dev, NULL);
- free_netdev(net);
-no_net:
- return ret;
 }
 
 static int netvsc_remove(struct hv_device *dev)
@@ -1840,7 +1754,6 @@ static int netvsc_remove(struct hv_device *dev)
 
  hv_set_drvdata(dev, NULL);
 
- free_percpu(ndev_ctx->vf_stats);
  free_netdev(net);
  return 0;
 }
--
2.7.4


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[azure][PATCH v1 2/9] Revert "UBUNTU: SAUCE: netvsc: optional transparent fail over"

Marcelo Henrique Cerri
In reply to this post by Marcelo Henrique Cerri
BugLink: http://bugs.launchpad.net/bugs/1708469

This reverts commit cc6cf50b614f58110ae4857ca5287e675b1971d9.

That commit was replaced by net-next commit 0c195567a8f6 ("netvsc:
transparent VF management").

Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
---
 drivers/net/hyperv/hyperv_net.h |   1 -
 drivers/net/hyperv/netvsc_drv.c | 185 +++++-----------------------------------
 2 files changed, 19 insertions(+), 167 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d20d794d396f..880572765b5d 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -717,7 +717,6 @@ struct net_device_context {
 
  /* State to manage the associated VF interface. */
  struct net_device __rcu *vf_netdev;
- struct work_struct vf_takeover;
 
  /* 1: allocated, serial number is valid. 0: not allocated */
  u32 vf_alloc;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index f93106e69d80..9400839cfb27 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -66,10 +66,6 @@ static int debug = -1;
 module_param(debug, int, S_IRUGO);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
-static bool transparent_vf __ro_after_init;
-module_param(transparent_vf, bool, S_IRUGO);
-MODULE_PARM_DESC(transparent_vf, "Transparent failover of SR-IOV devices");
-
 static void netvsc_set_multicast_list(struct net_device *net)
 {
  struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -81,10 +77,8 @@ static void netvsc_set_multicast_list(struct net_device *net)
 static int netvsc_open(struct net_device *net)
 {
  struct netvsc_device *nvdev = net_device_to_netvsc_device(net);
- struct net_device_context *ndev_ctx = netdev_priv(net);
- struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
  struct rndis_device *rdev;
- int ret;
+ int ret = 0;
 
  netif_carrier_off(net);
 
@@ -101,31 +95,19 @@ static int netvsc_open(struct net_device *net)
  if (!rdev->link_state)
  netif_carrier_on(net);
 
- if (transparent_vf && vf_netdev) {
- ret = dev_open(vf_netdev);
- if (ret)
- netdev_warn(net,
-    "unable to open slave: %s: %d\n",
-    vf_netdev->name, ret);
- }
- return 0;
+ return ret;
 }
 
 static int netvsc_close(struct net_device *net)
 {
  struct net_device_context *net_device_ctx = netdev_priv(net);
  struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
- struct net_device *vf_netdev
- = rtnl_dereference(net_device_ctx->vf_netdev);
+ int ret;
  u32 aread, i, msec = 10, retry = 0, retry_max = 20;
  struct vmbus_channel *chn;
- int ret;
 
  netif_tx_disable(net);
 
- if (transparent_vf && vf_netdev)
- dev_close(vf_netdev);
-
  ret = rndis_filter_close(nvdev);
  if (ret != 0) {
  netdev_err(net, "unable to close device (ret %d).\n", ret);
@@ -371,15 +353,6 @@ static u32 net_checksum_info(struct sk_buff *skb)
  return TRANSPORT_INFO_NOT_IP;
 }
 
-static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev,
-  struct sk_buff *skb)
-{
- skb->dev = vf_netdev;
- /* TODO stats */
-
- return dev_queue_xmit(skb);
-}
-
 static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 {
  struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -394,16 +367,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
  struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
  struct hv_page_buffer *pb = page_buf;
 
- /* already called with rcu_read_lock */
- if (transparent_vf) {
- struct net_device *vf_netdev;
-
- /* if VF is present and up then redirect packets */
- vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
- if (vf_netdev && netif_running(vf_netdev))
- return netvsc_vf_xmit(net, vf_netdev, skb);
- }
-
  /* We can only transmit MAX_PAGE_BUFFER_COUNT number
  * of pages in a single packet. If skb is scattered around
  * more pages we try linearizing it.
@@ -679,31 +642,29 @@ int netvsc_recv_callback(struct net_device *net,
  struct netvsc_device *net_device;
  u16 q_idx = channel->offermsg.offer.sub_channel_index;
  struct netvsc_channel *nvchan;
- struct net_device *vf_netdev = NULL;
+ struct net_device *vf_netdev;
  struct sk_buff *skb;
  struct netvsc_stats *rx_stats;
 
  if (net->reg_state != NETREG_REGISTERED)
  return NVSP_STAT_FAIL;
 
+ /*
+ * If necessary, inject this packet into the VF interface.
+ * On Hyper-V, multicast and brodcast packets are only delivered
+ * to the synthetic interface (after subjecting these to
+ * policy filters on the host). Deliver these via the VF
+ * interface in the guest.
+ */
  rcu_read_lock();
  net_device = rcu_dereference(net_device_ctx->nvdev);
  if (unlikely(!net_device))
  goto drop;
 
- /* On Hyper-V, multicast and brodcast packets are only delivered
- * to the synthetic interface (after subjecting these to
- * policy filters on the host). If doing transparent_vf mode
- * all packets appear to be received on the synthetic interface;
- * in legacy mode deliver these via the VF interface.
- */
- if (!transparent_vf) {
- vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
- if (vf_netdev && (vf_netdev->flags & IFF_UP))
- net = vf_netdev;
- }
-
  nvchan = &net_device->chan_table[q_idx];
+ vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
+ if (vf_netdev && (vf_netdev->flags & IFF_UP))
+ net = vf_netdev;
 
  /* Allocate a skb - TODO direct I/O to pages? */
  skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
@@ -1420,101 +1381,6 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
  return NULL;
 }
 
-/* Called when VF is injecting data into network stack.
- * Change the associated network device from VF to netvsc.
- * note: already called with rcu_read_lock
- */
-static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
-{
- struct sk_buff *skb = *pskb;
- struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data);
-
- skb->dev = ndev;
-
- /* TODO: stats */
- return RX_HANDLER_ANOTHER;
-}
-
-static int netvsc_vf_join(struct net_device *vf_netdev,
-  struct net_device *ndev)
-{
- struct net_device_context *ndev_ctx = netdev_priv(ndev);
- int ret;
-
- ret = netdev_rx_handler_register(vf_netdev,
- netvsc_vf_handle_frame, ndev);
- if (ret != 0) {
- netdev_err(vf_netdev,
-   "transparent_vf rx possible device (err = %d)\n",
-   ret);
- goto rx_handler_failed;
- }
-
- ret = netdev_upper_dev_link(vf_netdev, ndev);
- if (ret != 0) {
- netdev_err(vf_netdev,
-   "transparent_vf can not set master device %s (err =%d)\n",
-   ndev->name, ret);
- goto upper_link_failed;
- }
-
- /* set slave flag before open to prevent IPv6 addrconf */
- vf_netdev->flags |= IFF_SLAVE;
-
- /* Avoid overhead of qdisc in slave */
- vf_netdev->priv_flags |= IFF_NO_QUEUE;
-
- schedule_work(&ndev_ctx->vf_takeover);
-
- netdev_info(vf_netdev, "joined to %s\n", ndev->name);
- return 0;
-
-upper_link_failed:
- netdev_rx_handler_unregister(vf_netdev);
-rx_handler_failed:
- return ret;
-}
-
-static void __netvsc_vf_setup(struct net_device *ndev,
-      struct net_device *vf_netdev)
-{
- int ret;
-
- call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
-
- /* Align MTU of VF with master */
- ret = dev_set_mtu(vf_netdev, ndev->mtu);
- if (ret)
- netdev_warn(vf_netdev,
-    "unable to change mtu to %u\n", ndev->mtu);
-
- if (netif_running(ndev)) {
- ret = dev_open(vf_netdev);
- if (ret)
- netdev_warn(vf_netdev,
-    "unable to open: %d\n", ret);
- }
-}
-
-/* Setup VF as slave of the synthetic device.
- * Runs in workqueue to avoid recursion in netlink callbacks.
- */
-static void netvsc_vf_setup(struct work_struct *w)
-{
- struct net_device_context *ndev_ctx
- = container_of(w, struct net_device_context, vf_takeover);
- struct hv_device *device_obj = ndev_ctx->device_ctx;
- struct net_device *ndev = hv_get_drvdata(device_obj);
- struct net_device *vf_netdev;
-
- rtnl_lock();
- vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
- if (vf_netdev)
- __netvsc_vf_setup(ndev, vf_netdev);
-
- rtnl_unlock();
-}
-
 static int netvsc_register_vf(struct net_device *vf_netdev)
 {
  struct net_device *ndev;
@@ -1538,13 +1404,10 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
  if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
  return NOTIFY_DONE;
 
- if (transparent_vf &&
-    netvsc_vf_join(vf_netdev, ndev) != 0)
- return NOTIFY_DONE;
-
  netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
-
- /* Prevent this module from being unloaded while VF is registered */
+ /*
+ * Take a reference on the module.
+ */
  try_module_get(THIS_MODULE);
 
  dev_hold(vf_netdev);
@@ -1578,12 +1441,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
  netvsc_switch_datapath(ndev, true);
  netdev_info(ndev, "Data path switched to VF: %s\n", vf_netdev->name);
 
- /* If not doing transparent active-backup
- * then drop carrier of the netvsc device so that bonding
- * does switchover.
- */
- if (!transparent_vf)
- netif_carrier_off(ndev);
+ netif_carrier_off(ndev);
 
  /* Now notify peers through VF device. */
  call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
@@ -1608,9 +1466,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
  netvsc_switch_datapath(ndev, false);
  netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
  rndis_filter_close(netvsc_dev);
-
- if (!transparent_vf)
- netif_carrier_on(ndev);
+ netif_carrier_on(ndev);
 
  /* Now notify peers through netvsc device. */
  call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
@@ -1629,8 +1485,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 
  net_device_ctx = netdev_priv(ndev);
 
- cancel_work_sync(&net_device_ctx->vf_takeover);
-
  netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
  RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
@@ -1670,7 +1524,6 @@ static int netvsc_probe(struct hv_device *dev,
 
  spin_lock_init(&net_device_ctx->lock);
  INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
- INIT_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
 
  net->netdev_ops = &device_ops;
  net->ethtool_ops = &ethtool_ops;
--
2.7.4


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[azure][PATCH v1 3/9] Revert "UBUNTU: SAUCE: netvsc: optimize calculation of number of slots"

Marcelo Henrique Cerri
In reply to this post by Marcelo Henrique Cerri
BugLink: http://bugs.launchpad.net/bugs/1708469

This reverts commit a0c7595bce2091faff21222bdcfb4e7ebd3a6b5f.

The equivalent commit was also reverted in net-next.

Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
---
 drivers/net/hyperv/netvsc_drv.c | 43 +++++++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 9400839cfb27..1825929db52a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -322,14 +322,34 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
  return slots_used;
 }
 
-/* Estimate number of page buffers neede to transmit
- * Need at most 2 for RNDIS header plus skb body and fragments.
- */
-static unsigned int netvsc_get_slots(const struct sk_buff *skb)
+static int count_skb_frag_slots(struct sk_buff *skb)
+{
+ int i, frags = skb_shinfo(skb)->nr_frags;
+ int pages = 0;
+
+ for (i = 0; i < frags; i++) {
+ skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+ unsigned long size = skb_frag_size(frag);
+ unsigned long offset = frag->page_offset;
+
+ /* Skip unused frames from start of page */
+ offset &= ~PAGE_MASK;
+ pages += PFN_UP(offset + size);
+ }
+ return pages;
+}
+
+static int netvsc_get_slots(struct sk_buff *skb)
 {
- return PFN_UP(offset_in_page(skb->data) + skb_headlen(skb))
- + skb_shinfo(skb)->nr_frags
- + 2;
+ char *data = skb->data;
+ unsigned int offset = offset_in_page(data);
+ unsigned int len = skb_headlen(skb);
+ int slots;
+ int frag_slots;
+
+ slots = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+ frag_slots = count_skb_frag_slots(skb);
+ return slots + frag_slots;
 }
 
 static u32 net_checksum_info(struct sk_buff *skb)
@@ -367,18 +387,21 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
  struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
  struct hv_page_buffer *pb = page_buf;
 
- /* We can only transmit MAX_PAGE_BUFFER_COUNT number
+ /* We will atmost need two pages to describe the rndis
+ * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
  * of pages in a single packet. If skb is scattered around
  * more pages we try linearizing it.
  */
- num_data_pgs = netvsc_get_slots(skb);
+
+ num_data_pgs = netvsc_get_slots(skb) + 2;
+
  if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) {
  ++net_device_ctx->eth_stats.tx_scattered;
 
  if (skb_linearize(skb))
  goto no_memory;
 
- num_data_pgs = netvsc_get_slots(skb);
+ num_data_pgs = netvsc_get_slots(skb) + 2;
  if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
  ++net_device_ctx->eth_stats.tx_too_big;
  goto drop;
--
2.7.4


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[azure][PATCH v1 4/9] hv_netvsc: Fix the carrier state error when data path is off

Marcelo Henrique Cerri
In reply to this post by Marcelo Henrique Cerri
From: Haiyang Zhang <[hidden email]>

BugLink: http://bugs.launchpad.net/bugs/1708469

When the VF NIC is opened, the synthetic NIC's carrier state is set to
off. This tells the host to transitions data path to the VF device. But
if startup script or user manipulates the admin state of the netvsc
device directly for example:
        # ifconfig eth0 down
        # ifconfig eth0 up
Then the carrier state of the synthetic NIC would be on, even though the
data path was still over the VF NIC. This patch sets the carrier state
of synthetic NIC with consideration of the related VF state.

Signed-off-by: Haiyang Zhang <[hidden email]>
Reviewed-by: Stephen Hemminger <[hidden email]>
Signed-off-by: David S. Miller <[hidden email]>
(cherry picked from commit 53fa1a6f33520f01f9dbee48369074b34d77616b)
Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
---
 drivers/net/hyperv/hyperv_net.h | 2 ++
 drivers/net/hyperv/netvsc.c     | 2 ++
 drivers/net/hyperv/netvsc_drv.c | 8 +++++---
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 880572765b5d..27b71b701e2d 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -722,6 +722,8 @@ struct net_device_context {
  u32 vf_alloc;
  /* Serial number of the VF to team with */
  u32 vf_serial;
+
+ bool datapath; /* 0 - synthetic, 1 - VF nic */
 };
 
 /* Per channel data */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 6a2f295d94df..de3aa1cb2611 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -60,6 +60,8 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
        sizeof(struct nvsp_message),
        (unsigned long)init_pkt,
        VM_PKT_DATA_INBAND, 0);
+
+ net_device_ctx->datapath = vf;
 }
 
 static struct netvsc_device *alloc_net_device(void)
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1825929db52a..c506033c0c1b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -76,7 +76,8 @@ static void netvsc_set_multicast_list(struct net_device *net)
 
 static int netvsc_open(struct net_device *net)
 {
- struct netvsc_device *nvdev = net_device_to_netvsc_device(net);
+ struct net_device_context *ndev_ctx = netdev_priv(net);
+ struct netvsc_device *nvdev = ndev_ctx->nvdev;
  struct rndis_device *rdev;
  int ret = 0;
 
@@ -92,7 +93,7 @@ static int netvsc_open(struct net_device *net)
  netif_tx_wake_all_queues(net);
 
  rdev = nvdev->extension;
- if (!rdev->link_state)
+ if (!rdev->link_state && !ndev_ctx->datapath)
  netif_carrier_on(net);
 
  return ret;
@@ -1317,7 +1318,8 @@ static void netvsc_link_change(struct work_struct *w)
  case RNDIS_STATUS_MEDIA_CONNECT:
  if (rdev->link_state) {
  rdev->link_state = false;
- netif_carrier_on(net);
+ if (!ndev_ctx->datapath)
+ netif_carrier_on(net);
  netif_tx_wake_all_queues(net);
  } else {
  notify = true;
--
2.7.4


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[azure][PATCH v1 5/9] UBUNTU: SAUCE: netvsc: add some rtnl_dereference annotations

Marcelo Henrique Cerri
In reply to this post by Marcelo Henrique Cerri
From: stephen hemminger <[hidden email]>

BugLink: http://bugs.launchpad.net/bugs/1708469

In a couple places RTNL is held, and the netvsc_device pointer
is acquired without annotation.

Signed-off-by: Stephen Hemminger <[hidden email]>
Signed-off-by: David S. Miller <[hidden email]>
(cherry picked from net-next commit 79e8cbe7a789a0863cc6cd874872b4dd63ec5947)
Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
---
 drivers/net/hyperv/netvsc.c     | 5 +++--
 drivers/net/hyperv/netvsc_drv.c | 7 ++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index de3aa1cb2611..ca7d07f8e4f1 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -44,7 +44,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 {
  struct net_device_context *net_device_ctx = netdev_priv(ndev);
  struct hv_device *dev = net_device_ctx->device_ctx;
- struct netvsc_device *nv_dev = net_device_ctx->nvdev;
+ struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
  struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
 
  memset(init_pkt, 0, sizeof(struct nvsp_message));
@@ -569,7 +569,8 @@ void netvsc_device_remove(struct hv_device *device)
 {
  struct net_device *ndev = hv_get_drvdata(device);
  struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct netvsc_device *net_device = net_device_ctx->nvdev;
+ struct netvsc_device *net_device
+ = rtnl_dereference(net_device_ctx->nvdev);
  int i;
 
  netvsc_disconnect_vsp(device);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index c506033c0c1b..0c79a73bf37a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -77,7 +77,7 @@ static void netvsc_set_multicast_list(struct net_device *net)
 static int netvsc_open(struct net_device *net)
 {
  struct net_device_context *ndev_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = ndev_ctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev);
  struct rndis_device *rdev;
  int ret = 0;
 
@@ -1396,7 +1396,7 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
  continue; /* not a netvsc device */
 
  net_device_ctx = netdev_priv(dev);
- if (net_device_ctx->nvdev == NULL)
+ if (!rtnl_dereference(net_device_ctx->nvdev))
  continue; /* device is removed */
 
  if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev)
@@ -1625,7 +1625,8 @@ static int netvsc_remove(struct hv_device *dev)
  * removed. Also blocks mtu and channel changes.
  */
  rtnl_lock();
- rndis_filter_device_remove(dev, ndev_ctx->nvdev);
+ rndis_filter_device_remove(dev,
+   rtnl_dereference(ndev_ctx->nvdev));
  rtnl_unlock();
 
  unregister_netdev(net);
--
2.7.4


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[azure][PATCH v1 6/9] UBUNTU: SAUCE: netvsc: include rtnetlink.h

Marcelo Henrique Cerri
In reply to this post by Marcelo Henrique Cerri
From: stephen hemminger <[hidden email]>

BugLink: http://bugs.launchpad.net/bugs/1708469

Since these files use rtnl_derefernce make sure and include rtnetlink.h

Signed-off-by: Stephen Hemminger <[hidden email]>
Signed-off-by: David S. Miller <[hidden email]>
(cherry picked from net-next commit 27f5aa92ccafbe1bbc695307e3dee41a0e924c28)
Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
---
 drivers/net/hyperv/netvsc_drv.c   | 2 ++
 drivers/net/hyperv/rndis_filter.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 0c79a73bf37a..4fb70af3d908 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -33,6 +33,8 @@
 #include <linux/if_vlan.h>
 #include <linux/in.h>
 #include <linux/slab.h>
+#include <linux/rtnetlink.h>
+
 #include <net/arp.h>
 #include <net/route.h>
 #include <net/sock.h>
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index e2ebd43967b2..8fc13bbb435f 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -28,6 +28,7 @@
 #include <linux/if_vlan.h>
 #include <linux/nls.h>
 #include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
 
 #include "hyperv_net.h"
 
--
2.7.4


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[azure][PATCH v1 7/9] UBUNTU: SAUCE: netvsc: transparent VF management

Marcelo Henrique Cerri
In reply to this post by Marcelo Henrique Cerri
From: stephen hemminger <[hidden email]>

BugLink: http://bugs.launchpad.net/bugs/1708469

This patch implements transparent fail over from synthetic NIC to
SR-IOV virtual function NIC in Hyper-V environment. It is a better
alternative to using bonding as is done now. Instead, the receive and
transmit fail over is done internally inside the driver.

Using bonding driver has lots of issues because it depends on the
script being run early enough in the boot process and with sufficient
information to make the association. This patch moves all that
functionality into the kernel.

Signed-off-by: Stephen Hemminger <[hidden email]>
Signed-off-by: David S. Miller <[hidden email]>
(cherry picked from net-next commit 0c195567a8f6e82ea5535cd9f1d54a1626dd233e)
Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
---
 drivers/net/hyperv/hyperv_net.h |  12 ++
 drivers/net/hyperv/netvsc_drv.c | 419 +++++++++++++++++++++++++++++++---------
 2 files changed, 342 insertions(+), 89 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 27b71b701e2d..b252aa384fa0 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -684,6 +684,15 @@ struct netvsc_ethtool_stats {
  unsigned long tx_busy;
 };
 
+struct netvsc_vf_pcpu_stats {
+ u64     rx_packets;
+ u64     rx_bytes;
+ u64     tx_packets;
+ u64     tx_bytes;
+ struct u64_stats_sync   syncp;
+ u32 tx_dropped;
+};
+
 struct netvsc_reconfig {
  struct list_head list;
  u32 event;
@@ -717,6 +726,9 @@ struct net_device_context {
 
  /* State to manage the associated VF interface. */
  struct net_device __rcu *vf_netdev;
+ struct netvsc_vf_pcpu_stats __percpu *vf_stats;
+ struct work_struct vf_takeover;
+ struct work_struct vf_notify;
 
  /* 1: allocated, serial number is valid. 0: not allocated */
  u32 vf_alloc;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 4fb70af3d908..1678428865d5 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -34,6 +34,7 @@
 #include <linux/in.h>
 #include <linux/slab.h>
 #include <linux/rtnetlink.h>
+#include <linux/netpoll.h>
 
 #include <net/arp.h>
 #include <net/route.h>
@@ -79,6 +80,7 @@ static void netvsc_set_multicast_list(struct net_device *net)
 static int netvsc_open(struct net_device *net)
 {
  struct net_device_context *ndev_ctx = netdev_priv(net);
+ struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
  struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev);
  struct rndis_device *rdev;
  int ret = 0;
@@ -95,15 +97,29 @@ static int netvsc_open(struct net_device *net)
  netif_tx_wake_all_queues(net);
 
  rdev = nvdev->extension;
- if (!rdev->link_state && !ndev_ctx->datapath)
+
+ if (!rdev->link_state)
  netif_carrier_on(net);
 
- return ret;
+ if (vf_netdev) {
+ /* Setting synthetic device up transparently sets
+ * slave as up. If open fails, then slave will be
+ * still be offline (and not used).
+ */
+ ret = dev_open(vf_netdev);
+ if (ret)
+ netdev_warn(net,
+    "unable to open slave: %s: %d\n",
+    vf_netdev->name, ret);
+ }
+ return 0;
 }
 
 static int netvsc_close(struct net_device *net)
 {
  struct net_device_context *net_device_ctx = netdev_priv(net);
+ struct net_device *vf_netdev
+ = rtnl_dereference(net_device_ctx->vf_netdev);
  struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
  int ret;
  u32 aread, i, msec = 10, retry = 0, retry_max = 20;
@@ -149,6 +165,9 @@ static int netvsc_close(struct net_device *net)
  ret = -ETIMEDOUT;
  }
 
+ if (vf_netdev)
+ dev_close(vf_netdev);
+
  return ret;
 }
 
@@ -232,13 +251,11 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
  *
  * TODO support XPS - but get_xps_queue not exported
  */
-static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
- void *accel_priv, select_queue_fallback_t fallback)
+static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
 {
- unsigned int num_tx_queues = ndev->real_num_tx_queues;
  int q_idx = sk_tx_queue_get(skb->sk);
 
- if (q_idx < 0 || skb->ooo_okay) {
+ if (q_idx < 0 || skb->ooo_okay || q_idx >= ndev->real_num_tx_queues) {
  /* If forwarding a packet, we use the recorded queue when
  * available for better cache locality.
  */
@@ -248,12 +265,33 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
  q_idx = netvsc_get_tx_queue(ndev, skb, q_idx);
  }
 
- while (unlikely(q_idx >= num_tx_queues))
- q_idx -= num_tx_queues;
-
  return q_idx;
 }
 
+static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+       void *accel_priv,
+       select_queue_fallback_t fallback)
+{
+ struct net_device_context *ndc = netdev_priv(ndev);
+ struct net_device *vf_netdev;
+ u16 txq;
+
+ rcu_read_lock();
+ vf_netdev = rcu_dereference(ndc->vf_netdev);
+ if (vf_netdev) {
+ txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+ qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+ } else {
+ txq = netvsc_pick_tx(ndev, skb);
+ }
+ rcu_read_unlock();
+
+ while (unlikely(txq >= ndev->real_num_tx_queues))
+ txq -= ndev->real_num_tx_queues;
+
+ return txq;
+}
+
 static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
  struct hv_page_buffer *pb)
 {
@@ -376,6 +414,33 @@ static u32 net_checksum_info(struct sk_buff *skb)
  return TRANSPORT_INFO_NOT_IP;
 }
 
+/* Send skb on the slave VF device. */
+static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev,
+  struct sk_buff *skb)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(net);
+ unsigned int len = skb->len;
+ int rc;
+
+ skb->dev = vf_netdev;
+ skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
+
+ rc = dev_queue_xmit(skb);
+ if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) {
+ struct netvsc_vf_pcpu_stats *pcpu_stats
+ = this_cpu_ptr(ndev_ctx->vf_stats);
+
+ u64_stats_update_begin(&pcpu_stats->syncp);
+ pcpu_stats->tx_packets++;
+ pcpu_stats->tx_bytes += len;
+ u64_stats_update_end(&pcpu_stats->syncp);
+ } else {
+ this_cpu_inc(ndev_ctx->vf_stats->tx_dropped);
+ }
+
+ return rc;
+}
+
 static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 {
  struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -384,12 +449,21 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
  unsigned int num_data_pgs;
  struct rndis_message *rndis_msg;
  struct rndis_packet *rndis_pkt;
+ struct net_device *vf_netdev;
  u32 rndis_msg_size;
  struct rndis_per_packet_info *ppi;
  u32 hash;
  struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
  struct hv_page_buffer *pb = page_buf;
 
+ /* if VF is present and up then redirect packets
+ * already called with rcu_read_lock_bh
+ */
+ vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev);
+ if (vf_netdev && netif_running(vf_netdev) &&
+    !netpoll_tx_running(net))
+ return netvsc_vf_xmit(net, vf_netdev, skb);
+
  /* We will atmost need two pages to describe the rndis
  * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
  * of pages in a single packet. If skb is scattered around
@@ -668,29 +742,18 @@ int netvsc_recv_callback(struct net_device *net,
  struct netvsc_device *net_device;
  u16 q_idx = channel->offermsg.offer.sub_channel_index;
  struct netvsc_channel *nvchan;
- struct net_device *vf_netdev;
  struct sk_buff *skb;
  struct netvsc_stats *rx_stats;
 
  if (net->reg_state != NETREG_REGISTERED)
  return NVSP_STAT_FAIL;
 
- /*
- * If necessary, inject this packet into the VF interface.
- * On Hyper-V, multicast and brodcast packets are only delivered
- * to the synthetic interface (after subjecting these to
- * policy filters on the host). Deliver these via the VF
- * interface in the guest.
- */
  rcu_read_lock();
  net_device = rcu_dereference(net_device_ctx->nvdev);
  if (unlikely(!net_device))
  goto drop;
 
  nvchan = &net_device->chan_table[q_idx];
- vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
- if (vf_netdev && (vf_netdev->flags & IFF_UP))
- net = vf_netdev;
 
  /* Allocate a skb - TODO direct I/O to pages? */
  skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
@@ -702,8 +765,7 @@ int netvsc_recv_callback(struct net_device *net,
  return NVSP_STAT_FAIL;
  }
 
- if (net != vf_netdev)
- skb_record_rx_queue(skb, q_idx);
+ skb_record_rx_queue(skb, q_idx);
 
  /*
  * Even if injecting the packet, record the statistics
@@ -879,6 +941,7 @@ static int netvsc_set_link_ksettings(struct net_device *dev,
 static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 {
  struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
  struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
  struct hv_device *hdev = ndevctx->device_ctx;
  int orig_mtu = ndev->mtu;
@@ -889,6 +952,13 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
  if (!nvdev || nvdev->destroy)
  return -ENODEV;
 
+ /* Change MTU of underlying VF netdev first. */
+ if (vf_netdev) {
+ ret = dev_set_mtu(vf_netdev, mtu);
+ if (ret)
+ return ret;
+ }
+
  netif_device_detach(ndev);
  was_opened = rndis_filter_opened(nvdev);
  if (was_opened)
@@ -912,6 +982,9 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
  /* Attempt rollback to original MTU */
  ndev->mtu = orig_mtu;
  rndis_filter_device_add(hdev, &device_info);
+
+ if (vf_netdev)
+ dev_set_mtu(vf_netdev, orig_mtu);
  }
 
  if (was_opened)
@@ -925,16 +998,56 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
  return ret;
 }
 
+static void netvsc_get_vf_stats(struct net_device *net,
+ struct netvsc_vf_pcpu_stats *tot)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(net);
+ int i;
+
+ memset(tot, 0, sizeof(*tot));
+
+ for_each_possible_cpu(i) {
+ const struct netvsc_vf_pcpu_stats *stats
+ = per_cpu_ptr(ndev_ctx->vf_stats, i);
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&stats->syncp);
+ rx_packets = stats->rx_packets;
+ tx_packets = stats->tx_packets;
+ rx_bytes = stats->rx_bytes;
+ tx_bytes = stats->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes   += rx_bytes;
+ tot->tx_bytes   += tx_bytes;
+ tot->tx_dropped += stats->tx_dropped;
+ }
+}
+
 static void netvsc_get_stats64(struct net_device *net,
        struct rtnl_link_stats64 *t)
 {
  struct net_device_context *ndev_ctx = netdev_priv(net);
  struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
- int i;
+ struct netvsc_vf_pcpu_stats vf_tot;
+ int i;
 
  if (!nvdev)
  return;
 
+ netdev_stats_to_stats64(t, &net->stats);
+
+ netvsc_get_vf_stats(net, &vf_tot);
+ t->rx_packets += vf_tot.rx_packets;
+ t->tx_packets += vf_tot.tx_packets;
+ t->rx_bytes   += vf_tot.rx_bytes;
+ t->tx_bytes   += vf_tot.tx_bytes;
+ t->tx_dropped += vf_tot.tx_dropped;
+
  for (i = 0; i < nvdev->num_chn; i++) {
  const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
  const struct netvsc_stats *stats;
@@ -963,12 +1076,6 @@ static void netvsc_get_stats64(struct net_device *net,
  t->rx_packets += packets;
  t->multicast += multicast;
  }
-
- t->tx_dropped = net->stats.tx_dropped;
- t->tx_errors = net->stats.tx_errors;
-
- t->rx_dropped = net->stats.rx_dropped;
- t->rx_errors = net->stats.rx_errors;
 }
 
 static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
@@ -1004,9 +1111,16 @@ static const struct {
  { "tx_no_space",  offsetof(struct netvsc_ethtool_stats, tx_no_space) },
  { "tx_too_big",  offsetof(struct netvsc_ethtool_stats, tx_too_big) },
  { "tx_busy",  offsetof(struct netvsc_ethtool_stats, tx_busy) },
+}, vf_stats[] = {
+ { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
+ { "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
+ { "vf_tx_packets", offsetof(struct netvsc_vf_pcpu_stats, tx_packets) },
+ { "vf_tx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, tx_bytes) },
+ { "vf_tx_dropped", offsetof(struct netvsc_vf_pcpu_stats, tx_dropped) },
 };
 
 #define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats)
+#define NETVSC_VF_STATS_LEN ARRAY_SIZE(vf_stats)
 
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
@@ -1021,7 +1135,9 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 
  switch (string_set) {
  case ETH_SS_STATS:
- return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev);
+ return NETVSC_GLOBAL_STATS_LEN
+ + NETVSC_VF_STATS_LEN
+ + NETVSC_QUEUE_STATS_LEN(nvdev);
  default:
  return -EINVAL;
  }
@@ -1034,6 +1150,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
  struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
  const void *nds = &ndc->eth_stats;
  const struct netvsc_stats *qstats;
+ struct netvsc_vf_pcpu_stats sum;
  unsigned int start;
  u64 packets, bytes;
  int i, j;
@@ -1044,6 +1161,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
  for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
  data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
 
+ netvsc_get_vf_stats(dev, &sum);
+ for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
+ data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
+
  for (j = 0; j < nvdev->num_chn; j++) {
  qstats = &nvdev->chan_table[j].tx_stats;
 
@@ -1078,11 +1199,16 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
  switch (stringset) {
  case ETH_SS_STATS:
- for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
- memcpy(p + i * ETH_GSTRING_LEN,
-       netvsc_stats[i].name, ETH_GSTRING_LEN);
+ for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) {
+ memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vf_stats); i++) {
+ memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
 
- p += i * ETH_GSTRING_LEN;
  for (i = 0; i < nvdev->num_chn; i++) {
  sprintf(p, "tx_queue_%u_packets", i);
  p += ETH_GSTRING_LEN;
@@ -1320,8 +1446,7 @@ static void netvsc_link_change(struct work_struct *w)
  case RNDIS_STATUS_MEDIA_CONNECT:
  if (rdev->link_state) {
  rdev->link_state = false;
- if (!ndev_ctx->datapath)
- netif_carrier_on(net);
+ netif_carrier_on(net);
  netif_tx_wake_all_queues(net);
  } else {
  notify = true;
@@ -1408,6 +1533,104 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
  return NULL;
 }
 
+/* Called when VF is injecting data into network stack.
+ * Change the associated network device from VF to netvsc.
+ * note: already called with rcu_read_lock
+ */
+static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
+{
+ struct sk_buff *skb = *pskb;
+ struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data);
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ struct netvsc_vf_pcpu_stats *pcpu_stats
+ = this_cpu_ptr(ndev_ctx->vf_stats);
+
+ skb->dev = ndev;
+
+ u64_stats_update_begin(&pcpu_stats->syncp);
+ pcpu_stats->rx_packets++;
+ pcpu_stats->rx_bytes += skb->len;
+ u64_stats_update_end(&pcpu_stats->syncp);
+
+ return RX_HANDLER_ANOTHER;
+}
+
+static int netvsc_vf_join(struct net_device *vf_netdev,
+  struct net_device *ndev)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ int ret;
+
+ ret = netdev_rx_handler_register(vf_netdev,
+ netvsc_vf_handle_frame, ndev);
+ if (ret != 0) {
+ netdev_err(vf_netdev,
+   "can not register netvsc VF receive handler (err = %d)\n",
+   ret);
+ goto rx_handler_failed;
+ }
+
+ ret = netdev_upper_dev_link(vf_netdev, ndev);
+ if (ret != 0) {
+ netdev_err(vf_netdev,
+   "can not set master device %s (err = %d)\n",
+   ndev->name, ret);
+ goto upper_link_failed;
+ }
+
+ /* set slave flag before open to prevent IPv6 addrconf */
+ vf_netdev->flags |= IFF_SLAVE;
+
+ schedule_work(&ndev_ctx->vf_takeover);
+
+ netdev_info(vf_netdev, "joined to %s\n", ndev->name);
+ return 0;
+
+upper_link_failed:
+ netdev_rx_handler_unregister(vf_netdev);
+rx_handler_failed:
+ return ret;
+}
+
+static void __netvsc_vf_setup(struct net_device *ndev,
+      struct net_device *vf_netdev)
+{
+ int ret;
+
+ call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
+
+ /* Align MTU of VF with master */
+ ret = dev_set_mtu(vf_netdev, ndev->mtu);
+ if (ret)
+ netdev_warn(vf_netdev,
+    "unable to change mtu to %u\n", ndev->mtu);
+
+ if (netif_running(ndev)) {
+ ret = dev_open(vf_netdev);
+ if (ret)
+ netdev_warn(vf_netdev,
+    "unable to open: %d\n", ret);
+ }
+}
+
+/* Setup VF as slave of the synthetic device.
+ * Runs in workqueue to avoid recursion in netlink callbacks.
+ */
+static void netvsc_vf_setup(struct work_struct *w)
+{
+ struct net_device_context *ndev_ctx
+ = container_of(w, struct net_device_context, vf_takeover);
+ struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+ struct net_device *vf_netdev;
+
+ rtnl_lock();
+ vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+ if (vf_netdev)
+ __netvsc_vf_setup(ndev, vf_netdev);
+
+ rtnl_unlock();
+}
+
 static int netvsc_register_vf(struct net_device *vf_netdev)
 {
  struct net_device *ndev;
@@ -1431,10 +1654,12 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
  if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
  return NOTIFY_DONE;
 
+ if (netvsc_vf_join(vf_netdev, ndev) != 0)
+ return NOTIFY_DONE;
+
  netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
- /*
- * Take a reference on the module.
- */
+
+ /* Prevent this module from being unloaded while VF is registered */
  try_module_get(THIS_MODULE);
 
  dev_hold(vf_netdev);
@@ -1442,61 +1667,59 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
  return NOTIFY_OK;
 }
 
-static int netvsc_vf_up(struct net_device *vf_netdev)
+/* Change datapath */
+static void netvsc_vf_update(struct work_struct *w)
 {
- struct net_device *ndev;
+ struct net_device_context *ndev_ctx
+ = container_of(w, struct net_device_context, vf_notify);
+ struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
  struct netvsc_device *netvsc_dev;
- struct net_device_context *net_device_ctx;
-
- ndev = get_netvsc_byref(vf_netdev);
- if (!ndev)
- return NOTIFY_DONE;
-
- net_device_ctx = netdev_priv(ndev);
- netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
-
- netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-
- /*
- * Open the device before switching data path.
- */
- rndis_filter_open(netvsc_dev);
-
- /*
- * notify the host to switch the data path.
- */
- netvsc_switch_datapath(ndev, true);
- netdev_info(ndev, "Data path switched to VF: %s\n", vf_netdev->name);
-
- netif_carrier_off(ndev);
+ struct net_device *vf_netdev;
+ bool vf_is_up;
 
- /* Now notify peers through VF device. */
- call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
+ rtnl_lock();
+ vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+ if (!vf_netdev)
+ goto unlock;
+
+ netvsc_dev = rtnl_dereference(ndev_ctx->nvdev);
+ if (!netvsc_dev)
+ goto unlock;
+
+ vf_is_up = netif_running(vf_netdev);
+ if (vf_is_up != ndev_ctx->datapath) {
+ if (vf_is_up) {
+ netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
+ rndis_filter_open(netvsc_dev);
+ netvsc_switch_datapath(ndev, true);
+ netdev_info(ndev, "Data path switched to VF: %s\n",
+    vf_netdev->name);
+ } else {
+ netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
+ netvsc_switch_datapath(ndev, false);
+ rndis_filter_close(netvsc_dev);
+ netdev_info(ndev, "Data path switched from VF: %s\n",
+    vf_netdev->name);
+ }
 
- return NOTIFY_OK;
+ /* Now notify peers through VF device. */
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
+ }
+unlock:
+ rtnl_unlock();
 }
 
-static int netvsc_vf_down(struct net_device *vf_netdev)
+static int netvsc_vf_notify(struct net_device *vf_netdev)
 {
- struct net_device *ndev;
- struct netvsc_device *netvsc_dev;
  struct net_device_context *net_device_ctx;
+ struct net_device *ndev;
 
  ndev = get_netvsc_byref(vf_netdev);
  if (!ndev)
  return NOTIFY_DONE;
 
  net_device_ctx = netdev_priv(ndev);
- netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
-
- netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
- netvsc_switch_datapath(ndev, false);
- netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
- rndis_filter_close(netvsc_dev);
- netif_carrier_on(ndev);
-
- /* Now notify peers through netvsc device. */
- call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
+ schedule_work(&net_device_ctx->vf_notify);
 
  return NOTIFY_OK;
 }
@@ -1511,9 +1734,12 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
  return NOTIFY_DONE;
 
  net_device_ctx = netdev_priv(ndev);
+ cancel_work_sync(&net_device_ctx->vf_takeover);
+ cancel_work_sync(&net_device_ctx->vf_notify);
 
  netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
+ netdev_upper_dev_unlink(vf_netdev, ndev);
  RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
  dev_put(vf_netdev);
  module_put(THIS_MODULE);
@@ -1527,12 +1753,12 @@ static int netvsc_probe(struct hv_device *dev,
  struct net_device_context *net_device_ctx;
  struct netvsc_device_info device_info;
  struct netvsc_device *nvdev;
- int ret;
+ int ret = -ENOMEM;
 
  net = alloc_etherdev_mq(sizeof(struct net_device_context),
  VRSS_CHANNEL_MAX);
  if (!net)
- return -ENOMEM;
+ goto no_net;
 
  netif_carrier_off(net);
 
@@ -1551,6 +1777,13 @@ static int netvsc_probe(struct hv_device *dev,
 
  spin_lock_init(&net_device_ctx->lock);
  INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
+ INIT_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
+ INIT_WORK(&net_device_ctx->vf_notify, netvsc_vf_update);
+
+ net_device_ctx->vf_stats
+ = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
+ if (!net_device_ctx->vf_stats)
+ goto no_stats;
 
  net->netdev_ops = &device_ops;
  net->ethtool_ops = &ethtool_ops;
@@ -1570,10 +1803,9 @@ static int netvsc_probe(struct hv_device *dev,
  if (IS_ERR(nvdev)) {
  ret = PTR_ERR(nvdev);
  netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
- free_netdev(net);
- hv_set_drvdata(dev, NULL);
- return ret;
+ goto rndis_failed;
  }
+
  memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
 
  /* hw_features computed in rndis_filter_device_add */
@@ -1597,11 +1829,20 @@ static int netvsc_probe(struct hv_device *dev,
  ret = register_netdev(net);
  if (ret != 0) {
  pr_err("Unable to register netdev.\n");
- rndis_filter_device_remove(dev, nvdev);
- free_netdev(net);
+ goto register_failed;
  }
 
  return ret;
+
+register_failed:
+ rndis_filter_device_remove(dev, nvdev);
+rndis_failed:
+ free_percpu(net_device_ctx->vf_stats);
+no_stats:
+ hv_set_drvdata(dev, NULL);
+ free_netdev(net);
+no_net:
+ return ret;
 }
 
 static int netvsc_remove(struct hv_device *dev)
@@ -1635,6 +1876,7 @@ static int netvsc_remove(struct hv_device *dev)
 
  hv_set_drvdata(dev, NULL);
 
+ free_percpu(ndev_ctx->vf_stats);
  free_netdev(net);
  return 0;
 }
@@ -1689,9 +1931,8 @@ static int netvsc_netdev_event(struct notifier_block *this,
  case NETDEV_UNREGISTER:
  return netvsc_unregister_vf(event_dev);
  case NETDEV_UP:
- return netvsc_vf_up(event_dev);
  case NETDEV_DOWN:
- return netvsc_vf_down(event_dev);
+ return netvsc_vf_notify(event_dev);
  default:
  return NOTIFY_DONE;
  }
--
2.7.4


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[azure][PATCH v1 8/9] UBUNTU: SAUCE: netvsc: add documentation

Marcelo Henrique Cerri
In reply to this post by Marcelo Henrique Cerri
From: stephen hemminger <[hidden email]>

BugLink: http://bugs.launchpad.net/bugs/1708469

Add some background documentation on netvsc device options
and limitations.

Signed-off-by: Stephen Hemminger <[hidden email]>
Signed-off-by: David S. Miller <[hidden email]>
(cherry picked from net-next commit a5050c61036859e6fd7924f25cc6a97e7462039d)
Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
---
 Documentation/networking/netvsc.txt | 63 +++++++++++++++++++++++++++++++++++++
 MAINTAINERS                         |  1 +
 2 files changed, 64 insertions(+)
 create mode 100644 Documentation/networking/netvsc.txt

diff --git a/Documentation/networking/netvsc.txt b/Documentation/networking/netvsc.txt
new file mode 100644
index 000000000000..4ddb4e4b0426
--- /dev/null
+++ b/Documentation/networking/netvsc.txt
@@ -0,0 +1,63 @@
+Hyper-V network driver
+======================
+
+Compatibility
+=============
+
+This driver is compatible with Windows Server 2012 R2, 2016 and
+Windows 10.
+
+Features
+========
+
+  Checksum offload
+  ----------------
+  The netvsc driver supports checksum offload as long as the
+  Hyper-V host version does. Windows Server 2016 and Azure
+  support checksum offload for TCP and UDP for both IPv4 and
+  IPv6. Windows Server 2012 only supports checksum offload for TCP.
+
+  Receive Side Scaling
+  --------------------
+  Hyper-V supports receive side scaling. For TCP, packets are
+  distributed among available queues based on IP address and port
+  number. Current versions of Hyper-V host, only distribute UDP
+  packets based on the IP source and destination address.
+  The port number is not used as part of the hash value for UDP.
+  Fragmented IP packets are not distributed between queues;
+  all fragmented packets arrive on the first channel.
+
+  Generic Receive Offload, aka GRO
+  --------------------------------
+  The driver supports GRO and it is enabled by default. GRO coalesces
+  like packets and significantly reduces CPU usage under heavy Rx
+  load.
+
+  SR-IOV support
+  --------------
+  Hyper-V supports SR-IOV as a hardware acceleration option. If SR-IOV
+  is enabled in both the vSwitch and the guest configuration, then the
+  Virtual Function (VF) device is passed to the guest as a PCI
+  device. In this case, both a synthetic (netvsc) and VF device are
+  visible in the guest OS and both NIC's have the same MAC address.
+
+  The VF is enslaved by netvsc device.  The netvsc driver will transparently
+  switch the data path to the VF when it is available and up.
+  Network state (addresses, firewall, etc) should be applied only to the
+  netvsc device; the slave device should not be accessed directly in
+  most cases.  The exceptions are if some special queue discipline or
+  flow direction is desired, these should be applied directly to the
+  VF slave device.
+
+  Receive Buffer
+  --------------
+  Packets are received into a receive area which is created when device
+  is probed. The receive area is broken into MTU sized chunks and each may
+  contain one or more packets. The number of receive sections may be changed
+  via ethtool Rx ring parameters.
+
+  There is a similar send buffer which is used to aggregate packets for sending.
+  The send area is broken into chunks of 6144 bytes, each of section may
+  contain one or more packets. The send buffer is an optimization, the driver
+  will use slower method to handle very large packets or if the send buffer
+  area is exhausted.
diff --git a/MAINTAINERS b/MAINTAINERS
index 8618e6b21458..6f260f64dc05 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6086,6 +6086,7 @@ M: Haiyang Zhang <[hidden email]>
 M: Stephen Hemminger <[hidden email]>
 L: [hidden email]
 S: Maintained
+F: Documentation/networking/netvsc.txt
 F: arch/x86/include/asm/mshyperv.h
 F: arch/x86/include/asm/trace/hyperv.h
 F: arch/x86/include/uapi/asm/hyperv.h
--
2.7.4


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[azure][PATCH v1 9/9] netvsc: remove bonding setup script

Marcelo Henrique Cerri
In reply to this post by Marcelo Henrique Cerri
From: stephen hemminger <[hidden email]>

BugLink: http://bugs.launchpad.net/bugs/1708469

No longer needed, now all managed by transparent VF logic.

Signed-off-by: Stephen Hemminger <[hidden email]>
Signed-off-by: David S. Miller <[hidden email]>
(backported from net-next commit 12aa7469d101e139b3728e540884bc7d72dca70a)
[mhcerri: ignored any missing changes to tools/hv/bondvf.sh and simply
removed the file]
Signed-off-by: Marcelo Henrique Cerri <[hidden email]>
---
 tools/hv/bondvf.sh | 193 -----------------------------------------------------
 1 file changed, 193 deletions(-)
 delete mode 100755 tools/hv/bondvf.sh

diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
deleted file mode 100755
index 4aa5369ffa4e..000000000000
--- a/tools/hv/bondvf.sh
+++ /dev/null
@@ -1,193 +0,0 @@
-#!/bin/bash
-
-# This example script creates bonding network devices based on synthetic NIC
-# (the virtual network adapter usually provided by Hyper-V) and the matching
-# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can
-# function as one network device, and fail over to the synthetic NIC if VF is
-# down.
-#
-# Usage:
-# - After configured vSwitch and vNIC with SRIOV, start Linux virtual
-#   machine (VM)
-# - Run this scripts on the VM. It will create configuration files in
-#   distro specific directory.
-# - Reboot the VM, so that the bonding config are enabled.
-#
-# The config files are DHCP by default. You may edit them if you need to change
-# to Static IP or change other settings.
-#
-
-sysdir=/sys/class/net
-netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e}
-bondcnt=0
-
-# Detect Distro
-if [ -f /etc/redhat-release ];
-then
- cfgdir=/etc/sysconfig/network-scripts
- distro=redhat
-elif grep -q 'Ubuntu' /etc/issue
-then
- cfgdir=/etc/network
- distro=ubuntu
-elif grep -q 'SUSE' /etc/issue
-then
- cfgdir=/etc/sysconfig/network
- distro=suse
-else
- echo "Unsupported Distro"
- exit 1
-fi
-
-echo Detected Distro: $distro, or compatible
-
-# Get a list of ethernet names
-list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`)
-eth_cnt=${#list_eth[@]}
-
-echo List of net devices:
-
-# Get the MAC addresses
-for (( i=0; i < $eth_cnt; i++ ))
-do
- list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address`
- echo ${list_eth[$i]}, ${list_mac[$i]}
-done
-
-# Find NIC with matching MAC
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
- for (( j=i+1; j < $eth_cnt; j++ ))
- do
- if [ "${list_mac[$i]}" = "${list_mac[$j]}" ]
- then
- list_match[$i]=${list_eth[$j]}
- break
- fi
- done
-done
-
-function create_eth_cfg_redhat {
- local fn=$cfgdir/ifcfg-$1
-
- rm -f $fn
- echo DEVICE=$1 >>$fn
- echo TYPE=Ethernet >>$fn
- echo BOOTPROTO=none >>$fn
- echo UUID=`uuidgen` >>$fn
- echo ONBOOT=yes >>$fn
- echo PEERDNS=yes >>$fn
- echo IPV6INIT=yes >>$fn
- echo MASTER=$2 >>$fn
- echo SLAVE=yes >>$fn
-}
-
-function create_eth_cfg_pri_redhat {
- create_eth_cfg_redhat $1 $2
-}
-
-function create_bond_cfg_redhat {
- local fn=$cfgdir/ifcfg-$1
-
- rm -f $fn
- echo DEVICE=$1 >>$fn
- echo TYPE=Bond >>$fn
- echo BOOTPROTO=dhcp >>$fn
- echo UUID=`uuidgen` >>$fn
- echo ONBOOT=yes >>$fn
- echo PEERDNS=yes >>$fn
- echo IPV6INIT=yes >>$fn
- echo BONDING_MASTER=yes >>$fn
- echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
-}
-
-function create_eth_cfg_ubuntu {
- local fn=$cfgdir/interfaces
-
- echo $'\n'auto $1 >>$fn
- echo iface $1 inet manual >>$fn
- echo bond-master $2 >>$fn
-}
-
-function create_eth_cfg_pri_ubuntu {
- local fn=$cfgdir/interfaces
-
- create_eth_cfg_ubuntu $1 $2
- echo bond-primary $1 >>$fn
-}
-
-function create_bond_cfg_ubuntu {
- local fn=$cfgdir/interfaces
-
- echo $'\n'auto $1 >>$fn
- echo iface $1 inet dhcp >>$fn
- echo bond-mode active-backup >>$fn
- echo bond-miimon 100 >>$fn
- echo bond-slaves none >>$fn
-}
-
-function create_eth_cfg_suse {
-        local fn=$cfgdir/ifcfg-$1
-
-        rm -f $fn
- echo BOOTPROTO=none >>$fn
- echo STARTMODE=auto >>$fn
-}
-
-function create_eth_cfg_pri_suse {
- create_eth_cfg_suse $1
-}
-
-function create_bond_cfg_suse {
- local fn=$cfgdir/ifcfg-$1
-
- rm -f $fn
- echo BOOTPROTO=dhcp >>$fn
- echo STARTMODE=auto >>$fn
- echo BONDING_MASTER=yes >>$fn
- echo BONDING_SLAVE_0=$2 >>$fn
- echo BONDING_SLAVE_1=$3 >>$fn
- echo BONDING_MODULE_OPTS=\'mode=active-backup miimon=100 primary=$2\' >>$fn
-}
-
-function create_bond {
- local bondname=bond$bondcnt
- local primary
- local secondary
-
- local class_id1=`cat $sysdir/$1/device/class_id 2>/dev/null`
- local class_id2=`cat $sysdir/$2/device/class_id 2>/dev/null`
-
- if [ "$class_id1" = "$netvsc_cls" ]
- then
- primary=$2
- secondary=$1
- elif [ "$class_id2" = "$netvsc_cls" ]
- then
- primary=$1
- secondary=$2
- else
- return 0
- fi
-
- echo $'\nBond name:' $bondname
-
- echo configuring $primary
- create_eth_cfg_pri_$distro $primary $bondname
-
- echo configuring $secondary
- create_eth_cfg_$distro $secondary $bondname
-
- echo creating: $bondname with primary slave: $primary
- create_bond_cfg_$distro $bondname $primary $secondary
-
- let bondcnt=bondcnt+1
-}
-
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
-        if [ -n "${list_match[$i]}" ]
-        then
- create_bond ${list_eth[$i]} ${list_match[$i]}
-        fi
-done
--
2.7.4


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

APPLIED: [azure][PATCH v1 0/9] [Hyper-V] Transparent SR-IOV solves bonding race conditions

Marcelo Henrique Cerri
In reply to this post by Marcelo Henrique Cerri
Acked-by: Marcelo Henrique Cerri <[hidden email]>

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Loading...