Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
David S. Miller [Wed, 27 Mar 2013 17:52:49 +0000 (13:52 -0400)]
Conflicts:
include/net/ipip.h

The changes made to ipip.h in 'net' were already included
in 'net-next' before that header was moved to another location.

Signed-off-by: David S. Miller <davem@davemloft.net>

1  2 
MAINTAINERS
drivers/infiniband/hw/cxgb4/qp.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
net/bridge/br_fdb.c
net/core/dev.c
net/ipv4/tcp_input.c
net/ipv6/addrconf.c

diff --combined MAINTAINERS
@@@ -1461,6 -1461,12 +1461,12 @@@ F:    drivers/dma/at_hdmac.
  F:    drivers/dma/at_hdmac_regs.h
  F:    include/linux/platform_data/dma-atmel.h
  
+ ATMEL I2C DRIVER
+ M:    Ludovic Desroches <ludovic.desroches@atmel.com>
+ L:    linux-i2c@vger.kernel.org
+ S:    Supported
+ F:    drivers/i2c/busses/i2c-at91.c
  ATMEL ISI DRIVER
  M:    Josh Wu <josh.wu@atmel.com>
  L:    linux-media@vger.kernel.org
@@@ -2623,7 -2629,7 +2629,7 @@@ F:      include/uapi/drm
  
  INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
  M:    Daniel Vetter <daniel.vetter@ffwll.ch>
- L:    intel-gfx@lists.freedesktop.org (subscribers-only)
+ L:    intel-gfx@lists.freedesktop.org
  L:    dri-devel@lists.freedesktop.org
  T:    git git://people.freedesktop.org/~danvet/drm-intel
  S:    Supported
@@@ -5641,6 -5647,14 +5647,14 @@@ S:    Maintaine
  F:    drivers/video/riva/
  F:    drivers/video/nvidia/
  
+ NVM EXPRESS DRIVER
+ M:    Matthew Wilcox <willy@linux.intel.com>
+ L:    linux-nvme@lists.infradead.org
+ T:    git git://git.infradead.org/users/willy/linux-nvme.git
+ S:    Supported
+ F:    drivers/block/nvme.c
+ F:    include/linux/nvme.h
  OMAP SUPPORT
  M:    Tony Lindgren <tony@atomide.com>
  L:    linux-omap@vger.kernel.org
@@@ -5669,7 -5683,7 +5683,7 @@@ S:      Maintaine
  F:    arch/arm/*omap*/*clock*
  
  OMAP POWER MANAGEMENT SUPPORT
- M:    Kevin Hilman <khilman@ti.com>
+ M:    Kevin Hilman <khilman@deeprootsystems.com>
  L:    linux-omap@vger.kernel.org
  S:    Maintained
  F:    arch/arm/*omap*/*pm*
@@@ -5763,7 -5777,7 +5777,7 @@@ F:      arch/arm/*omap*/usb
  
  OMAP GPIO DRIVER
  M:    Santosh Shilimkar <santosh.shilimkar@ti.com>
- M:    Kevin Hilman <khilman@ti.com>
+ M:    Kevin Hilman <khilman@deeprootsystems.com>
  L:    linux-omap@vger.kernel.org
  S:    Maintained
  F:    drivers/gpio/gpio-omap.c
@@@ -6310,7 -6324,6 +6324,7 @@@ F:      drivers/acpi/apei/erst.
  
  PTP HARDWARE CLOCK SUPPORT
  M:    Richard Cochran <richardcochran@gmail.com>
 +L:    netdev@vger.kernel.org
  S:    Maintained
  W:    http://linuxptp.sourceforge.net/
  F:    Documentation/ABI/testing/sysfs-ptp
@@@ -6442,7 -6455,6 +6456,7 @@@ S:      Supporte
  F:    drivers/net/ethernet/qlogic/qlcnic/
  
  QLOGIC QLGE 10Gb ETHERNET DRIVER
 +M:    Shahed Shaikh <shahed.shaikh@qlogic.com>
  M:    Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
  M:    Ron Mercer <ron.mercer@qlogic.com>
  M:    linux-driver@qlogic.com
@@@ -7161,7 -7173,7 +7175,7 @@@ F:      arch/arm/mach-s3c2410/bast-irq.
  
  TI DAVINCI MACHINE SUPPORT
  M:    Sekhar Nori <nsekhar@ti.com>
- M:    Kevin Hilman <khilman@ti.com>
+ M:    Kevin Hilman <khilman@deeprootsystems.com>
  L:    davinci-linux-open-source@linux.davincidsp.com (moderated for non-subscribers)
  T:    git git://gitorious.org/linux-davinci/linux-davinci.git
  Q:    http://patchwork.kernel.org/project/linux-davinci/list/
@@@ -8502,7 -8514,7 +8516,7 @@@ F:      drivers/usb/gadget/*uvc*.
  F:    drivers/usb/gadget/webcam.c
  
  USB WIRELESS RNDIS DRIVER (rndis_wlan)
 -M:    Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
 +M:    Jussi Kivilinna <jussi.kivilinna@iki.fi>
  L:    linux-wireless@vger.kernel.org
  S:    Maintained
  F:    drivers/net/wireless/rndis_wlan.c
@@@ -42,21 -42,10 +42,21 @@@ static int ocqp_support = 1
  module_param(ocqp_support, int, 0644);
  MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
  
 -int db_fc_threshold = 2000;
 +int db_fc_threshold = 1000;
  module_param(db_fc_threshold, int, 0644);
 -MODULE_PARM_DESC(db_fc_threshold, "QP count/threshold that triggers automatic "
 -               "db flow control mode (default = 2000)");
 +MODULE_PARM_DESC(db_fc_threshold,
 +               "QP count/threshold that triggers"
 +               " automatic db flow control mode (default = 1000)");
 +
 +int db_coalescing_threshold;
 +module_param(db_coalescing_threshold, int, 0644);
 +MODULE_PARM_DESC(db_coalescing_threshold,
 +               "QP count/threshold that triggers"
 +               " disabling db coalescing (default = 0)");
 +
 +static int max_fr_immd = T4_MAX_FR_IMMD;
 +module_param(max_fr_immd, int, 0644);
 +MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate");
  
  static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
  {
@@@ -87,7 -76,7 +87,7 @@@ static void dealloc_sq(struct c4iw_rde
  
  static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
  {
 -      if (!ocqp_support || !t4_ocqp_supported())
 +      if (!ocqp_support || !ocqp_supported(&rdev->lldi))
                return -ENOSYS;
        sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize);
        if (!sq->dma_addr)
@@@ -140,7 -129,7 +140,7 @@@ static int create_qp(struct c4iw_rdev *
        int wr_len;
        struct c4iw_wr_wait wr_wait;
        struct sk_buff *skb;
 -      int ret;
 +      int ret = 0;
        int eqsize;
  
        wq->sq.qid = c4iw_get_qpid(rdev, uctx);
        }
  
        if (user) {
 -              ret = alloc_oc_sq(rdev, &wq->sq);
 -              if (ret)
 +              if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq))
                        goto free_hwaddr;
 -
 -              ret = alloc_host_sq(rdev, &wq->sq);
 -              if (ret)
 -                      goto free_sq;
 -      } else
 +      } else {
                ret = alloc_host_sq(rdev, &wq->sq);
                if (ret)
                        goto free_hwaddr;
 +      }
 +
        memset(wq->sq.queue, 0, wq->sq.memsize);
        dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
  
        wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev),
                                          wq->rq.memsize, &(wq->rq.dma_addr),
                                          GFP_KERNEL);
-       if (!wq->rq.queue)
+       if (!wq->rq.queue) {
+               ret = -ENOMEM;
                goto free_sq;
+       }
        PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
                __func__, wq->sq.queue,
                (unsigned long long)virt_to_phys(wq->sq.queue),
@@@ -540,7 -534,7 +542,7 @@@ static int build_rdma_recv(struct c4iw_
  }
  
  static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
 -                       struct ib_send_wr *wr, u8 *len16)
 +                       struct ib_send_wr *wr, u8 *len16, u8 t5dev)
  {
  
        struct fw_ri_immd *imdp;
        wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
        wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start &
                                        0xffffffff);
 -      WARN_ON(pbllen > T4_MAX_FR_IMMD);
 -      imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
 -      imdp->op = FW_RI_DATA_IMMD;
 -      imdp->r1 = 0;
 -      imdp->r2 = 0;
 -      imdp->immdlen = cpu_to_be32(pbllen);
 -      p = (__be64 *)(imdp + 1);
 -      rem = pbllen;
 -      for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
 -              *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
 -              rem -= sizeof *p;
 -              if (++p == (__be64 *)&sq->queue[sq->size])
 -                      p = (__be64 *)sq->queue;
 -      }
 -      BUG_ON(rem < 0);
 -      while (rem) {
 -              *p = 0;
 -              rem -= sizeof *p;
 -              if (++p == (__be64 *)&sq->queue[sq->size])
 -                      p = (__be64 *)sq->queue;
 +
 +      if (t5dev && use_dsgl && (pbllen > max_fr_immd)) {
 +              struct c4iw_fr_page_list *c4pl =
 +                      to_c4iw_fr_page_list(wr->wr.fast_reg.page_list);
 +              struct fw_ri_dsgl *sglp;
 +
 +              for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
 +                      wr->wr.fast_reg.page_list->page_list[i] = (__force u64)
 +                              cpu_to_be64((u64)
 +                              wr->wr.fast_reg.page_list->page_list[i]);
 +              }
 +
 +              sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
 +              sglp->op = FW_RI_DATA_DSGL;
 +              sglp->r1 = 0;
 +              sglp->nsge = cpu_to_be16(1);
 +              sglp->addr0 = cpu_to_be64(c4pl->dma_addr);
 +              sglp->len0 = cpu_to_be32(pbllen);
 +
 +              *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
 +      } else {
 +              imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
 +              imdp->op = FW_RI_DATA_IMMD;
 +              imdp->r1 = 0;
 +              imdp->r2 = 0;
 +              imdp->immdlen = cpu_to_be32(pbllen);
 +              p = (__be64 *)(imdp + 1);
 +              rem = pbllen;
 +              for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
 +                      *p = cpu_to_be64(
 +                              (u64)wr->wr.fast_reg.page_list->page_list[i]);
 +                      rem -= sizeof(*p);
 +                      if (++p == (__be64 *)&sq->queue[sq->size])
 +                              p = (__be64 *)sq->queue;
 +              }
 +              BUG_ON(rem < 0);
 +              while (rem) {
 +                      *p = 0;
 +                      rem -= sizeof(*p);
 +                      if (++p == (__be64 *)&sq->queue[sq->size])
 +                              p = (__be64 *)sq->queue;
 +              }
 +              *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp)
 +                                    + pbllen, 16);
        }
 -      *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16);
        return 0;
  }
  
@@@ -707,10 -678,7 +709,10 @@@ int c4iw_post_send(struct ib_qp *ibqp, 
                case IB_WR_FAST_REG_MR:
                        fw_opcode = FW_RI_FR_NSMR_WR;
                        swsqe->opcode = FW_RI_FAST_REGISTER;
 -                      err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16);
 +                      err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16,
 +                                          is_t5(
 +                                          qhp->rhp->rdev.lldi.adapter_type) ?
 +                                          1 : 0);
                        break;
                case IB_WR_LOCAL_INV:
                        if (wr->send_flags & IB_SEND_FENCE)
@@@ -1482,9 -1450,6 +1484,9 @@@ int c4iw_destroy_qp(struct ib_qp *ib_qp
                rhp->db_state = NORMAL;
                idr_for_each(&rhp->qpidr, enable_qp_db, NULL);
        }
 +      if (db_coalescing_threshold >= 0)
 +              if (rhp->qpcnt <= db_coalescing_threshold)
 +                      cxgb4_enable_db_coalescing(rhp->rdev.lldi.ports[0]);
        spin_unlock_irq(&rhp->lock);
        atomic_dec(&qhp->refcnt);
        wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
@@@ -1596,15 -1561,11 +1598,15 @@@ struct ib_qp *c4iw_create_qp(struct ib_
        spin_lock_irq(&rhp->lock);
        if (rhp->db_state != NORMAL)
                t4_disable_wq_db(&qhp->wq);
 -      if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {
 +      rhp->qpcnt++;
 +      if (rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {
                rhp->rdev.stats.db_state_transitions++;
                rhp->db_state = FLOW_CONTROL;
                idr_for_each(&rhp->qpidr, disable_qp_db, NULL);
        }
 +      if (db_coalescing_threshold >= 0)
 +              if (rhp->qpcnt > db_coalescing_threshold)
 +                      cxgb4_disable_db_coalescing(rhp->rdev.lldi.ports[0]);
        ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
        spin_unlock_irq(&rhp->lock);
        if (ret)
@@@ -76,9 -76,12 +76,9 @@@ static const struct ixgbevf_info *ixgbe
   * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
   *   Class, Class Mask, private data (not used) }
   */
 -static struct pci_device_id ixgbevf_pci_tbl[] = {
 -      {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_VF),
 -      board_82599_vf},
 -      {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540_VF),
 -      board_X540_vf},
 -
 +static DEFINE_PCI_DEVICE_TABLE(ixgbevf_pci_tbl) = {
 +      {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_VF), board_82599_vf },
 +      {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540_VF), board_X540_vf },
        /* required last entry */
        {0, }
  };
@@@ -187,37 -190,28 +187,37 @@@ static bool ixgbevf_clean_tx_irq(struc
        struct ixgbevf_adapter *adapter = q_vector->adapter;
        union ixgbe_adv_tx_desc *tx_desc, *eop_desc;
        struct ixgbevf_tx_buffer *tx_buffer_info;
 -      unsigned int i, eop, count = 0;
 +      unsigned int i, count = 0;
        unsigned int total_bytes = 0, total_packets = 0;
  
        if (test_bit(__IXGBEVF_DOWN, &adapter->state))
                return true;
  
        i = tx_ring->next_to_clean;
 -      eop = tx_ring->tx_buffer_info[i].next_to_watch;
 -      eop_desc = IXGBEVF_TX_DESC(tx_ring, eop);
 +      tx_buffer_info = &tx_ring->tx_buffer_info[i];
 +      eop_desc = tx_buffer_info->next_to_watch;
  
 -      while ((eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)) &&
 -             (count < tx_ring->count)) {
 +      do {
                bool cleaned = false;
 -              rmb(); /* read buffer_info after eop_desc */
 -              /* eop could change between read and DD-check */
 -              if (unlikely(eop != tx_ring->tx_buffer_info[i].next_to_watch))
 -                      goto cont_loop;
 +
 +              /* if next_to_watch is not set then there is no work pending */
 +              if (!eop_desc)
 +                      break;
 +
 +              /* prevent any other reads prior to eop_desc */
 +              read_barrier_depends();
 +
 +              /* if DD is not set pending work has not been completed */
 +              if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
 +                      break;
 +
 +              /* clear next_to_watch to prevent false hangs */
 +              tx_buffer_info->next_to_watch = NULL;
 +
                for ( ; !cleaned; count++) {
                        struct sk_buff *skb;
                        tx_desc = IXGBEVF_TX_DESC(tx_ring, i);
 -                      tx_buffer_info = &tx_ring->tx_buffer_info[i];
 -                      cleaned = (i == eop);
 +                      cleaned = (tx_desc == eop_desc);
                        skb = tx_buffer_info->skb;
  
                        if (cleaned && skb) {
                        i++;
                        if (i == tx_ring->count)
                                i = 0;
 +
 +                      tx_buffer_info = &tx_ring->tx_buffer_info[i];
                }
  
 -cont_loop:
 -              eop = tx_ring->tx_buffer_info[i].next_to_watch;
 -              eop_desc = IXGBEVF_TX_DESC(tx_ring, eop);
 -      }
 +              eop_desc = tx_buffer_info->next_to_watch;
 +      } while (count < tx_ring->count);
  
        tx_ring->next_to_clean = i;
  
@@@ -950,9 -944,17 +950,17 @@@ free_queue_irqs
                free_irq(adapter->msix_entries[vector].vector,
                         adapter->q_vector[vector]);
        }
-       pci_disable_msix(adapter->pdev);
-       kfree(adapter->msix_entries);
-       adapter->msix_entries = NULL;
+       /* This failure is non-recoverable - it indicates the system is
+        * out of MSIX vector resources and the VF driver cannot run
+        * without them.  Set the number of msix vectors to zero
+        * indicating that not enough can be allocated.  The error
+        * will be returned to the user indicating device open failed.
+        * Any further attempts to force the driver to open will also
+        * fail.  The only way to recover is to unload the driver and
+        * reload it again.  If the system has recovered some MSIX
+        * vectors then it may succeed.
+        */
+       adapter->num_msix_vectors = 0;
        return err;
  }
  
@@@ -2423,6 -2425,9 +2431,6 @@@ int ixgbevf_setup_rx_resources(struct i
                                           &rx_ring->dma, GFP_KERNEL);
  
        if (!rx_ring->desc) {
 -              hw_dbg(&adapter->hw,
 -                     "Unable to allocate memory for "
 -                     "the receive descriptor ring\n");
                vfree(rx_ring->rx_buffer_info);
                rx_ring->rx_buffer_info = NULL;
                goto alloc_failed;
@@@ -2575,6 -2580,15 +2583,15 @@@ static int ixgbevf_open(struct net_devi
        struct ixgbe_hw *hw = &adapter->hw;
        int err;
  
+       /* A previous failure to open the device because of a lack of
+        * available MSIX vector resources may have reset the number
+        * of msix vectors variable to zero.  The only way to recover
+        * is to unload/reload the driver and hope that the system has
+        * been able to recover some MSIX vector resources.
+        */
+       if (!adapter->num_msix_vectors)
+               return -ENOMEM;
        /* disallow open during test */
        if (test_bit(__IXGBEVF_TESTING, &adapter->state))
                return -EBUSY;
  
  err_req_irq:
        ixgbevf_down(adapter);
-       ixgbevf_free_irq(adapter);
  err_setup_rx:
        ixgbevf_free_all_rx_resources(adapter);
  err_setup_tx:
@@@ -2809,7 -2822,8 +2825,7 @@@ static bool ixgbevf_tx_csum(struct ixgb
  }
  
  static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
 -                        struct sk_buff *skb, u32 tx_flags,
 -                        unsigned int first)
 +                        struct sk_buff *skb, u32 tx_flags)
  {
        struct ixgbevf_tx_buffer *tx_buffer_info;
        unsigned int len;
                                                     size, DMA_TO_DEVICE);
                if (dma_mapping_error(tx_ring->dev, tx_buffer_info->dma))
                        goto dma_error;
 -              tx_buffer_info->next_to_watch = i;
  
                len -= size;
                total -= size;
                                              tx_buffer_info->dma))
                                goto dma_error;
                        tx_buffer_info->mapped_as_page = true;
 -                      tx_buffer_info->next_to_watch = i;
  
                        len -= size;
                        total -= size;
        else
                i = i - 1;
        tx_ring->tx_buffer_info[i].skb = skb;
 -      tx_ring->tx_buffer_info[first].next_to_watch = i;
 -      tx_ring->tx_buffer_info[first].time_stamp = jiffies;
  
        return count;
  
@@@ -2889,6 -2907,7 +2905,6 @@@ dma_error
  
        /* clear timestamp and dma mappings for failed tx_buffer_info map */
        tx_buffer_info->dma = 0;
 -      tx_buffer_info->next_to_watch = 0;
        count--;
  
        /* clear timestamp and dma mappings for remaining portion of packet */
  }
  
  static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags,
 -                           int count, u32 paylen, u8 hdr_len)
 +                           int count, unsigned int first, u32 paylen,
 +                           u8 hdr_len)
  {
        union ixgbe_adv_tx_desc *tx_desc = NULL;
        struct ixgbevf_tx_buffer *tx_buffer_info;
  
        tx_desc->read.cmd_type_len |= cpu_to_le32(txd_cmd);
  
 +      tx_ring->tx_buffer_info[first].time_stamp = jiffies;
 +
 +      /* Force memory writes to complete before letting h/w
 +       * know there are new descriptors to fetch.  (Only
 +       * applicable for weak-ordered memory model archs,
 +       * such as IA-64).
 +       */
 +      wmb();
 +
 +      tx_ring->tx_buffer_info[first].next_to_watch = tx_desc;
        tx_ring->next_to_use = i;
  }
  
@@@ -3058,8 -3066,15 +3074,8 @@@ static int ixgbevf_xmit_frame(struct sk
                tx_flags |= IXGBE_TX_FLAGS_CSUM;
  
        ixgbevf_tx_queue(tx_ring, tx_flags,
 -                       ixgbevf_tx_map(tx_ring, skb, tx_flags, first),
 -                       skb->len, hdr_len);
 -      /*
 -       * Force memory writes to complete before letting h/w
 -       * know there are new descriptors to fetch.  (Only
 -       * applicable for weak-ordered memory model archs,
 -       * such as IA-64).
 -       */
 -      wmb();
 +                       ixgbevf_tx_map(tx_ring, skb, tx_flags),
 +                       first, skb->len, hdr_len);
  
        writel(tx_ring->next_to_use, adapter->hw.hw_addr + tx_ring->tail);
  
@@@ -1469,11 -1469,13 +1469,11 @@@ pch_gbe_alloc_rx_buffers_pool(struct pc
  
        size = rx_ring->count * bufsz + PCH_GBE_RESERVE_MEMORY;
        rx_ring->rx_buff_pool = dma_alloc_coherent(&pdev->dev, size,
 -                                              &rx_ring->rx_buff_pool_logic,
 -                                              GFP_KERNEL);
 -      if (!rx_ring->rx_buff_pool) {
 -              pr_err("Unable to allocate memory for the receive pool buffer\n");
 +                                                 &rx_ring->rx_buff_pool_logic,
 +                                                 GFP_KERNEL | __GFP_ZERO);
 +      if (!rx_ring->rx_buff_pool)
                return -ENOMEM;
 -      }
 -      memset(rx_ring->rx_buff_pool, 0, size);
 +
        rx_ring->rx_buff_pool_size = size;
        for (i = 0; i < rx_ring->count; i++) {
                buffer_info = &rx_ring->buffer_info[i];
@@@ -1724,9 -1726,9 +1724,9 @@@ pch_gbe_clean_rx(struct pch_gbe_adapte
  
                        skb->protocol = eth_type_trans(skb, netdev);
                        if (tcp_ip_status & PCH_GBE_RXD_ACC_STAT_TCPIPOK)
-                               skb->ip_summed = CHECKSUM_NONE;
-                       else
                                skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       else
+                               skb->ip_summed = CHECKSUM_NONE;
  
                        napi_gro_receive(&adapter->napi, skb);
                        (*work_done)++;
@@@ -1772,12 -1774,13 +1772,12 @@@ int pch_gbe_setup_tx_resources(struct p
        tx_ring->size = tx_ring->count * (int)sizeof(struct pch_gbe_tx_desc);
  
        tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size,
 -                                         &tx_ring->dma, GFP_KERNEL);
 +                                         &tx_ring->dma,
 +                                         GFP_KERNEL | __GFP_ZERO);
        if (!tx_ring->desc) {
                vfree(tx_ring->buffer_info);
 -              pr_err("Unable to allocate memory for the transmit descriptor ring\n");
                return -ENOMEM;
        }
 -      memset(tx_ring->desc, 0, tx_ring->size);
  
        tx_ring->next_to_use = 0;
        tx_ring->next_to_clean = 0;
@@@ -1817,12 -1820,14 +1817,12 @@@ int pch_gbe_setup_rx_resources(struct p
  
        rx_ring->size = rx_ring->count * (int)sizeof(struct pch_gbe_rx_desc);
        rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size,
 -                                         &rx_ring->dma, GFP_KERNEL);
 -
 +                                         &rx_ring->dma,
 +                                         GFP_KERNEL | __GFP_ZERO);
        if (!rx_ring->desc) {
                vfree(rx_ring->buffer_info);
                return -ENOMEM;
        }
 -      memset(rx_ring->desc, 0, rx_ring->size);
        rx_ring->next_to_clean = 0;
        rx_ring->next_to_use = 0;
        for (desNo = 0; desNo < rx_ring->count; desNo++) {
diff --combined net/bridge/br_fdb.c
@@@ -161,7 -161,9 +161,7 @@@ void br_fdb_change_mac_address(struct n
        if (!pv)
                return;
  
 -      for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid);
 -           vid < BR_VLAN_BITMAP_LEN;
 -           vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) {
 +      for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
                f = __br_fdb_get(br, br->dev->dev_addr, vid);
                if (f && f->is_local && !f->dst)
                        fdb_delete(br, f);
@@@ -421,7 -423,7 +421,7 @@@ static int fdb_insert(struct net_bridg
                        return 0;
                br_warn(br, "adding interface %s with same address "
                       "as a received packet\n",
-                      source->dev->name);
+                      source ? source->dev->name : br->dev->name);
                fdb_delete(br, fdb);
        }
  
@@@ -722,10 -724,13 +722,10 @@@ int br_fdb_add(struct ndmsg *ndm, struc
                 * specify a VLAN.  To be nice, add/update entry for every
                 * vlan on this port.
                 */
 -              vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
 -              while (vid < BR_VLAN_BITMAP_LEN) {
 +              for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
                        err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
                        if (err)
                                goto out;
 -                      vid = find_next_bit(pv->vlan_bitmap,
 -                                          BR_VLAN_BITMAP_LEN, vid+1);
                }
        }
  
@@@ -810,8 -815,11 +810,8 @@@ int br_fdb_delete(struct ndmsg *ndm, st
                 * vlan on this port.
                 */
                err = -ENOENT;
 -              vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
 -              while (vid < BR_VLAN_BITMAP_LEN) {
 +              for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
                        err &= __br_fdb_delete(p, addr, vid);
 -                      vid = find_next_bit(pv->vlan_bitmap,
 -                                          BR_VLAN_BITMAP_LEN, vid+1);
                }
        }
  out:
diff --combined net/core/dev.c
@@@ -1545,7 -1545,6 +1545,6 @@@ void net_enable_timestamp(void
                return;
        }
  #endif
-       WARN_ON(in_interrupt());
        static_key_slow_inc(&netstamp_needed);
  }
  EXPORT_SYMBOL(net_enable_timestamp);
@@@ -2208,8 -2207,16 +2207,8 @@@ out
  }
  EXPORT_SYMBOL(skb_checksum_help);
  
 -/**
 - *    skb_mac_gso_segment - mac layer segmentation handler.
 - *    @skb: buffer to segment
 - *    @features: features for the output path (see dev->features)
 - */
 -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 -                                  netdev_features_t features)
 +__be16 skb_network_protocol(struct sk_buff *skb)
  {
 -      struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 -      struct packet_offload *ptype;
        __be16 type = skb->protocol;
        int vlan_depth = ETH_HLEN;
  
                struct vlan_hdr *vh;
  
                if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
 -                      return ERR_PTR(-EINVAL);
 +                      return 0;
  
                vh = (struct vlan_hdr *)(skb->data + vlan_depth);
                type = vh->h_vlan_encapsulated_proto;
                vlan_depth += VLAN_HLEN;
        }
  
 +      return type;
 +}
 +
 +/**
 + *    skb_mac_gso_segment - mac layer segmentation handler.
 + *    @skb: buffer to segment
 + *    @features: features for the output path (see dev->features)
 + */
 +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 +                                  netdev_features_t features)
 +{
 +      struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 +      struct packet_offload *ptype;
 +      __be16 type = skb_network_protocol(skb);
 +
 +      if (unlikely(!type))
 +              return ERR_PTR(-EINVAL);
 +
        __skb_pull(skb, skb->mac_len);
  
        rcu_read_lock();
@@@ -2408,12 -2397,24 +2407,12 @@@ static int dev_gso_segment(struct sk_bu
        return 0;
  }
  
 -static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
 -{
 -      return ((features & NETIF_F_GEN_CSUM) ||
 -              ((features & NETIF_F_V4_CSUM) &&
 -               protocol == htons(ETH_P_IP)) ||
 -              ((features & NETIF_F_V6_CSUM) &&
 -               protocol == htons(ETH_P_IPV6)) ||
 -              ((features & NETIF_F_FCOE_CRC) &&
 -               protocol == htons(ETH_P_FCOE)));
 -}
 -
  static netdev_features_t harmonize_features(struct sk_buff *skb,
        __be16 protocol, netdev_features_t features)
  {
        if (skb->ip_summed != CHECKSUM_NONE &&
            !can_checksum_protocol(features, protocol)) {
                features &= ~NETIF_F_ALL_CSUM;
 -              features &= ~NETIF_F_SG;
        } else if (illegal_highdma(skb->dev, skb)) {
                features &= ~NETIF_F_SG;
        }
@@@ -2588,7 -2589,6 +2587,7 @@@ static void qdisc_pkt_len_init(struct s
         */
        if (shinfo->gso_size)  {
                unsigned int hdr_len;
 +              u16 gso_segs = shinfo->gso_segs;
  
                /* mac layer + network layer */
                hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
                        hdr_len += tcp_hdrlen(skb);
                else
                        hdr_len += sizeof(struct udphdr);
 -              qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
 +
 +              if (shinfo->gso_type & SKB_GSO_DODGY)
 +                      gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
 +                                              shinfo->gso_size);
 +
 +              qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
        }
  }
  
@@@ -3330,7 -3325,7 +3329,7 @@@ EXPORT_SYMBOL_GPL(netdev_rx_handler_reg
   *    netdev_rx_handler_unregister - unregister receive handler
   *    @dev: device to unregister a handler from
   *
 - *    Unregister a receive hander from a device.
 + *    Unregister a receive handler from a device.
   *
   *    The caller must hold the rtnl_mutex.
   */
@@@ -4062,9 -4057,6 +4061,9 @@@ void netif_napi_add(struct net_device *
        napi->gro_list = NULL;
        napi->skb = NULL;
        napi->poll = poll;
 +      if (weight > NAPI_POLL_WEIGHT)
 +              pr_err_once("netif_napi_add() called with weight %d on device %s\n",
 +                          weight, dev->name);
        napi->weight = weight;
        list_add(&napi->dev_list, &dev->napi_list);
        napi->dev = dev;
@@@ -4926,25 -4918,20 +4925,25 @@@ static netdev_features_t netdev_fix_fea
                features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
        }
  
 -      /* Fix illegal SG+CSUM combinations. */
 -      if ((features & NETIF_F_SG) &&
 -          !(features & NETIF_F_ALL_CSUM)) {
 -              netdev_dbg(dev,
 -                      "Dropping NETIF_F_SG since no checksum feature.\n");
 -              features &= ~NETIF_F_SG;
 -      }
 -
        /* TSO requires that SG is present as well. */
        if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
                netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
                features &= ~NETIF_F_ALL_TSO;
        }
  
 +      if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
 +                                      !(features & NETIF_F_IP_CSUM)) {
 +              netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
 +              features &= ~NETIF_F_TSO;
 +              features &= ~NETIF_F_TSO_ECN;
 +      }
 +
 +      if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
 +                                       !(features & NETIF_F_IPV6_CSUM)) {
 +              netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
 +              features &= ~NETIF_F_TSO6;
 +      }
 +
        /* TSO ECN requires that TSO is present as well. */
        if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
                features &= ~NETIF_F_TSO_ECN;
@@@ -5212,10 -5199,6 +5211,10 @@@ int register_netdevice(struct net_devic
         */
        dev->vlan_features |= NETIF_F_HIGHDMA;
  
 +      /* Make NETIF_F_SG inheritable to tunnel devices.
 +       */
 +      dev->hw_enc_features |= NETIF_F_SG;
 +
        ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
        ret = notifier_to_errno(ret);
        if (ret)
diff --combined net/ipv4/tcp_input.c
@@@ -93,11 -93,12 +93,11 @@@ int sysctl_tcp_stdurg __read_mostly
  int sysctl_tcp_rfc1337 __read_mostly;
  int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
  int sysctl_tcp_frto __read_mostly = 2;
 -int sysctl_tcp_frto_response __read_mostly;
  
  int sysctl_tcp_thin_dupack __read_mostly;
  
  int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 -int sysctl_tcp_early_retrans __read_mostly = 2;
 +int sysctl_tcp_early_retrans __read_mostly = 3;
  
  #define FLAG_DATA             0x01 /* Incoming frame contained data.          */
  #define FLAG_WIN_UPDATE               0x02 /* Incoming ACK was a window update.       */
  #define FLAG_DATA_SACKED      0x20 /* New SACK.                               */
  #define FLAG_ECE              0x40 /* ECE in this ACK                         */
  #define FLAG_SLOWPATH         0x100 /* Do not skip RFC checks for window update.*/
 -#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
 +#define FLAG_ORIG_SACK_ACKED  0x200 /* Never retransmitted data are (s)acked  */
  #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
  #define FLAG_DSACKING_ACK     0x800 /* SACK blocks contained D-SACK info */
 -#define FLAG_NONHEAD_RETRANS_ACKED    0x1000 /* Non-head rexmitted data was ACKed */
  #define FLAG_SACK_RENEGING    0x2000 /* snd_una advanced to a sacked seq */
  
  #define FLAG_ACKED            (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
  #define FLAG_NOT_DUP          (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
  #define FLAG_CA_ALERT         (FLAG_DATA_SACKED|FLAG_ECE)
  #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
 -#define FLAG_ANY_PROGRESS     (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
  
  #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
  #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
@@@ -1156,8 -1159,10 +1156,8 @@@ static u8 tcp_sacktag_one(struct sock *
                                           tcp_highest_sack_seq(tp)))
                                        state->reord = min(fack_count,
                                                           state->reord);
 -
 -                              /* SACK enhanced F-RTO (RFC4138; Appendix B) */
 -                              if (!after(end_seq, tp->frto_highmark))
 -                                      state->flag |= FLAG_ONLY_ORIG_SACKED;
 +                              if (!after(end_seq, tp->high_seq))
 +                                      state->flag |= FLAG_ORIG_SACK_ACKED;
                        }
  
                        if (sacked & TCPCB_LOST) {
@@@ -1550,6 -1555,7 +1550,6 @@@ static in
  tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
                        u32 prior_snd_una)
  {
 -      const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        const unsigned char *ptr = (skb_transport_header(ack_skb) +
                                    TCP_SKB_CB(ack_skb)->sacked);
@@@ -1722,6 -1728,12 +1722,6 @@@ walk
                                       start_seq, end_seq, dup_sack);
  
  advance_sp:
 -              /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
 -               * due to in-order walk
 -               */
 -              if (after(end_seq, tp->frto_highmark))
 -                      state.flag &= ~FLAG_ONLY_ORIG_SACKED;
 -
                i++;
        }
  
        tcp_verify_left_out(tp);
  
        if ((state.reord < tp->fackets_out) &&
 -          ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
 -          (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
 +          ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
                tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
  
  out:
@@@ -1812,6 -1825,197 +1812,6 @@@ static inline void tcp_reset_reno_sack(
        tp->sacked_out = 0;
  }
  
 -static int tcp_is_sackfrto(const struct tcp_sock *tp)
 -{
 -      return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
 -}
 -
 -/* F-RTO can only be used if TCP has never retransmitted anything other than
 - * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
 - */
 -bool tcp_use_frto(struct sock *sk)
 -{
 -      const struct tcp_sock *tp = tcp_sk(sk);
 -      const struct inet_connection_sock *icsk = inet_csk(sk);
 -      struct sk_buff *skb;
 -
 -      if (!sysctl_tcp_frto)
 -              return false;
 -
 -      /* MTU probe and F-RTO won't really play nicely along currently */
 -      if (icsk->icsk_mtup.probe_size)
 -              return false;
 -
 -      if (tcp_is_sackfrto(tp))
 -              return true;
 -
 -      /* Avoid expensive walking of rexmit queue if possible */
 -      if (tp->retrans_out > 1)
 -              return false;
 -
 -      skb = tcp_write_queue_head(sk);
 -      if (tcp_skb_is_last(sk, skb))
 -              return true;
 -      skb = tcp_write_queue_next(sk, skb);    /* Skips head */
 -      tcp_for_write_queue_from(skb, sk) {
 -              if (skb == tcp_send_head(sk))
 -                      break;
 -              if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
 -                      return false;
 -              /* Short-circuit when first non-SACKed skb has been checked */
 -              if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 -                      break;
 -      }
 -      return true;
 -}
 -
 -/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
 - * recovery a bit and use heuristics in tcp_process_frto() to detect if
 - * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
 - * keep retrans_out counting accurate (with SACK F-RTO, other than head
 - * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
 - * bits are handled if the Loss state is really to be entered (in
 - * tcp_enter_frto_loss).
 - *
 - * Do like tcp_enter_loss() would; when RTO expires the second time it
 - * does:
 - *  "Reduce ssthresh if it has not yet been made inside this window."
 - */
 -void tcp_enter_frto(struct sock *sk)
 -{
 -      const struct inet_connection_sock *icsk = inet_csk(sk);
 -      struct tcp_sock *tp = tcp_sk(sk);
 -      struct sk_buff *skb;
 -
 -      if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
 -          tp->snd_una == tp->high_seq ||
 -          ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
 -           !icsk->icsk_retransmits)) {
 -              tp->prior_ssthresh = tcp_current_ssthresh(sk);
 -              /* Our state is too optimistic in ssthresh() call because cwnd
 -               * is not reduced until tcp_enter_frto_loss() when previous F-RTO
 -               * recovery has not yet completed. Pattern would be this: RTO,
 -               * Cumulative ACK, RTO (2xRTO for the same segment does not end
 -               * up here twice).
 -               * RFC4138 should be more specific on what to do, even though
 -               * RTO is quite unlikely to occur after the first Cumulative ACK
 -               * due to back-off and complexity of triggering events ...
 -               */
 -              if (tp->frto_counter) {
 -                      u32 stored_cwnd;
 -                      stored_cwnd = tp->snd_cwnd;
 -                      tp->snd_cwnd = 2;
 -                      tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 -                      tp->snd_cwnd = stored_cwnd;
 -              } else {
 -                      tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 -              }
 -              /* ... in theory, cong.control module could do "any tricks" in
 -               * ssthresh(), which means that ca_state, lost bits and lost_out
 -               * counter would have to be faked before the call occurs. We
 -               * consider that too expensive, unlikely and hacky, so modules
 -               * using these in ssthresh() must deal these incompatibility
 -               * issues if they receives CA_EVENT_FRTO and frto_counter != 0
 -               */
 -              tcp_ca_event(sk, CA_EVENT_FRTO);
 -      }
 -
 -      tp->undo_marker = tp->snd_una;
 -      tp->undo_retrans = 0;
 -
 -      skb = tcp_write_queue_head(sk);
 -      if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
 -              tp->undo_marker = 0;
 -      if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
 -              TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 -              tp->retrans_out -= tcp_skb_pcount(skb);
 -      }
 -      tcp_verify_left_out(tp);
 -
 -      /* Too bad if TCP was application limited */
 -      tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
 -
 -      /* Earlier loss recovery underway (see RFC4138; Appendix B).
 -       * The last condition is necessary at least in tp->frto_counter case.
 -       */
 -      if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
 -          ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
 -          after(tp->high_seq, tp->snd_una)) {
 -              tp->frto_highmark = tp->high_seq;
 -      } else {
 -              tp->frto_highmark = tp->snd_nxt;
 -      }
 -      tcp_set_ca_state(sk, TCP_CA_Disorder);
 -      tp->high_seq = tp->snd_nxt;
 -      tp->frto_counter = 1;
 -}
 -
 -/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
 - * which indicates that we should follow the traditional RTO recovery,
 - * i.e. mark everything lost and do go-back-N retransmission.
 - */
 -static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 -{
 -      struct tcp_sock *tp = tcp_sk(sk);
 -      struct sk_buff *skb;
 -
 -      tp->lost_out = 0;
 -      tp->retrans_out = 0;
 -      if (tcp_is_reno(tp))
 -              tcp_reset_reno_sack(tp);
 -
 -      tcp_for_write_queue(skb, sk) {
 -              if (skb == tcp_send_head(sk))
 -                      break;
 -
 -              TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 -              /*
 -               * Count the retransmission made on RTO correctly (only when
 -               * waiting for the first ACK and did not get it)...
 -               */
 -              if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
 -                      /* For some reason this R-bit might get cleared? */
 -                      if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
 -                              tp->retrans_out += tcp_skb_pcount(skb);
 -                      /* ...enter this if branch just for the first segment */
 -                      flag |= FLAG_DATA_ACKED;
 -              } else {
 -                      if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
 -                              tp->undo_marker = 0;
 -                      TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 -              }
 -
 -              /* Marking forward transmissions that were made after RTO lost
 -               * can cause unnecessary retransmissions in some scenarios,
 -               * SACK blocks will mitigate that in some but not in all cases.
 -               * We used to not mark them but it was causing break-ups with
 -               * receivers that do only in-order receival.
 -               *
 -               * TODO: we could detect presence of such receiver and select
 -               * different behavior per flow.
 -               */
 -              if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
 -                      TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 -                      tp->lost_out += tcp_skb_pcount(skb);
 -                      tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
 -              }
 -      }
 -      tcp_verify_left_out(tp);
 -
 -      tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
 -      tp->snd_cwnd_cnt = 0;
 -      tp->snd_cwnd_stamp = tcp_time_stamp;
 -      tp->frto_counter = 0;
 -
 -      tp->reordering = min_t(unsigned int, tp->reordering,
 -                             sysctl_tcp_reordering);
 -      tcp_set_ca_state(sk, TCP_CA_Loss);
 -      tp->high_seq = tp->snd_nxt;
 -      TCP_ECN_queue_cwr(tp);
 -
 -      tcp_clear_all_retrans_hints(tp);
 -}
 -
  static void tcp_clear_retrans_partial(struct tcp_sock *tp)
  {
        tp->retrans_out = 0;
@@@ -1838,13 -2042,10 +1838,13 @@@ void tcp_enter_loss(struct sock *sk, in
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
 +      bool new_recovery = false;
  
        /* Reduce ssthresh if it has not yet been made inside this window. */
 -      if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
 +      if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
 +          !after(tp->high_seq, tp->snd_una) ||
            (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
 +              new_recovery = true;
                tp->prior_ssthresh = tcp_current_ssthresh(sk);
                tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
                tcp_ca_event(sk, CA_EVENT_LOSS);
        if (tcp_is_reno(tp))
                tcp_reset_reno_sack(tp);
  
-       if (!how) {
-               /* Push undo marker, if it was plain RTO and nothing
-                * was retransmitted. */
-               tp->undo_marker = tp->snd_una;
-       } else {
+       tp->undo_marker = tp->snd_una;
+       if (how) {
                tp->sacked_out = 0;
                tp->fackets_out = 0;
        }
        tcp_set_ca_state(sk, TCP_CA_Loss);
        tp->high_seq = tp->snd_nxt;
        TCP_ECN_queue_cwr(tp);
 -      /* Abort F-RTO algorithm if one is in progress */
 -      tp->frto_counter = 0;
 +
 +      /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
 +       * loss recovery is underway except recurring timeout(s) on
 +       * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
 +       */
 +      tp->frto = sysctl_tcp_frto &&
 +                 (new_recovery || icsk->icsk_retransmits) &&
 +                 !inet_csk(sk)->icsk_mtup.probe_size;
  }
  
  /* If ACK arrived pointing to a remembered SACK, it means that our
@@@ -1955,16 -2147,15 +1952,16 @@@ static bool tcp_pause_early_retransmit(
         * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
         * available, or RTO is scheduled to fire first.
         */
 -      if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
 +      if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
 +          (flag & FLAG_ECE) || !tp->srtt)
                return false;
  
        delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
        if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
                return false;
  
 -      inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
 -      tp->early_retrans_delayed = 1;
 +      inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
 +                                TCP_RTO_MAX);
        return true;
  }
  
@@@ -2080,6 -2271,10 +2077,6 @@@ static bool tcp_time_to_recover(struct 
        struct tcp_sock *tp = tcp_sk(sk);
        __u32 packets_out;
  
 -      /* Do not perform any recovery during F-RTO algorithm */
 -      if (tp->frto_counter)
 -              return false;
 -
        /* Trick#1: The loss is proven. */
        if (tp->lost_out)
                return true;
         * interval if appropriate.
         */
        if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
 -          (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
 +          (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
            !tcp_may_send_now(sk))
                return !tcp_pause_early_retransmit(sk, flag);
  
@@@ -2440,12 -2635,12 +2437,12 @@@ static int tcp_try_undo_partial(struct 
        return failed;
  }
  
 -/* Undo during loss recovery after partial ACK. */
 -static bool tcp_try_undo_loss(struct sock *sk)
 +/* Undo during loss recovery after partial ACK or using F-RTO. */
 +static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
  {
        struct tcp_sock *tp = tcp_sk(sk);
  
 -      if (tcp_may_undo(tp)) {
 +      if (frto_undo || tcp_may_undo(tp)) {
                struct sk_buff *skb;
                tcp_for_write_queue(skb, sk) {
                        if (skb == tcp_send_head(sk))
                tp->lost_out = 0;
                tcp_undo_cwr(sk, true);
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
 +              if (frto_undo)
 +                      NET_INC_STATS_BH(sock_net(sk),
 +                                       LINUX_MIB_TCPSPURIOUSRTOS);
                inet_csk(sk)->icsk_retransmits = 0;
                tp->undo_marker = 0;
 -              if (tcp_is_sack(tp))
 +              if (frto_undo || tcp_is_sack(tp))
                        tcp_set_ca_state(sk, TCP_CA_Open);
                return true;
        }
@@@ -2486,7 -2678,6 +2483,7 @@@ static void tcp_init_cwnd_reduction(str
        struct tcp_sock *tp = tcp_sk(sk);
  
        tp->high_seq = tp->snd_nxt;
 +      tp->tlp_high_seq = 0;
        tp->snd_cwnd_cnt = 0;
        tp->prior_cwnd = tp->snd_cwnd;
        tp->prr_delivered = 0;
@@@ -2564,7 -2755,7 +2561,7 @@@ static void tcp_try_to_open(struct soc
  
        tcp_verify_left_out(tp);
  
 -      if (!tp->frto_counter && !tcp_any_retrans_done(sk))
 +      if (!tcp_any_retrans_done(sk))
                tp->retrans_stamp = 0;
  
        if (flag & FLAG_ECE)
@@@ -2681,58 -2872,6 +2678,58 @@@ static void tcp_enter_recovery(struct s
        tcp_set_ca_state(sk, TCP_CA_Recovery);
  }
  
 +/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
 + * recovered or spurious. Otherwise retransmits more on partial ACKs.
 + */
 +static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
 +{
 +      struct inet_connection_sock *icsk = inet_csk(sk);
 +      struct tcp_sock *tp = tcp_sk(sk);
 +      bool recovered = !before(tp->snd_una, tp->high_seq);
 +
 +      if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
 +              if (flag & FLAG_ORIG_SACK_ACKED) {
 +                      /* Step 3.b. A timeout is spurious if not all data are
 +                       * lost, i.e., never-retransmitted data are (s)acked.
 +                       */
 +                      tcp_try_undo_loss(sk, true);
 +                      return;
 +              }
 +              if (after(tp->snd_nxt, tp->high_seq) &&
 +                  (flag & FLAG_DATA_SACKED || is_dupack)) {
 +                      tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
 +              } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
 +                      tp->high_seq = tp->snd_nxt;
 +                      __tcp_push_pending_frames(sk, tcp_current_mss(sk),
 +                                                TCP_NAGLE_OFF);
 +                      if (after(tp->snd_nxt, tp->high_seq))
 +                              return; /* Step 2.b */
 +                      tp->frto = 0;
 +              }
 +      }
 +
 +      if (recovered) {
 +              /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
 +              icsk->icsk_retransmits = 0;
 +              tcp_try_undo_recovery(sk);
 +              return;
 +      }
 +      if (flag & FLAG_DATA_ACKED)
 +              icsk->icsk_retransmits = 0;
 +      if (tcp_is_reno(tp)) {
 +              /* A Reno DUPACK means new data in F-RTO step 2.b above are
 +               * delivered. Lower inflight to clock out (re)tranmissions.
 +               */
 +              if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
 +                      tcp_add_reno_sack(sk);
 +              else if (flag & FLAG_SND_UNA_ADVANCED)
 +                      tcp_reset_reno_sack(tp);
 +      }
 +      if (tcp_try_undo_loss(sk, false))
 +              return;
 +      tcp_xmit_retransmit_queue(sk);
 +}
 +
  /* Process an event, which can update packets-in-flight not trivially.
   * Main goal of this function is to calculate new estimate for left_out,
   * taking into account both packets sitting in receiver's buffer and
@@@ -2779,6 -2918,12 +2776,6 @@@ static void tcp_fastretrans_alert(struc
                tp->retrans_stamp = 0;
        } else if (!before(tp->snd_una, tp->high_seq)) {
                switch (icsk->icsk_ca_state) {
 -              case TCP_CA_Loss:
 -                      icsk->icsk_retransmits = 0;
 -                      if (tcp_try_undo_recovery(sk))
 -                              return;
 -                      break;
 -
                case TCP_CA_CWR:
                        /* CWR is to be held something *above* high_seq
                         * is ACKed for CWR bit to reach receiver. */
                newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
                break;
        case TCP_CA_Loss:
 -              if (flag & FLAG_DATA_ACKED)
 -                      icsk->icsk_retransmits = 0;
 -              if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
 -                      tcp_reset_reno_sack(tp);
 -              if (!tcp_try_undo_loss(sk)) {
 -                      tcp_moderate_cwnd(tp);
 -                      tcp_xmit_retransmit_queue(sk);
 -                      return;
 -              }
 +              tcp_process_loss(sk, flag, is_dupack);
                if (icsk->icsk_ca_state != TCP_CA_Open)
                        return;
 -              /* Loss is undone; fall through to processing in Open state. */
 +              /* Fall through to processing in Open state. */
        default:
                if (tcp_is_reno(tp)) {
                        if (flag & FLAG_SND_UNA_ADVANCED)
@@@ -2925,7 -3078,6 +2922,7 @@@ static void tcp_cong_avoid(struct sock 
   */
  void tcp_rearm_rto(struct sock *sk)
  {
 +      const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
  
        /* If the retrans timer is currently being used by Fast Open
        } else {
                u32 rto = inet_csk(sk)->icsk_rto;
                /* Offset the time elapsed after installing regular RTO */
 -              if (tp->early_retrans_delayed) {
 +              if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
 +                  icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
                        struct sk_buff *skb = tcp_write_queue_head(sk);
                        const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
                        s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
                        /* delta may not be positive if the socket is locked
 -                       * when the delayed ER timer fires and is rescheduled.
 +                       * when the retrans timer fires and is rescheduled.
                         */
                        if (delta > 0)
                                rto = delta;
                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
                                          TCP_RTO_MAX);
        }
 -      tp->early_retrans_delayed = 0;
  }
  
  /* This function is called when the delayed ER timer fires. TCP enters
@@@ -3040,6 -3192,8 +3037,6 @@@ static int tcp_clean_rtx_queue(struct s
                        flag |= FLAG_RETRANS_DATA_ACKED;
                        ca_seq_rtt = -1;
                        seq_rtt = -1;
 -                      if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
 -                              flag |= FLAG_NONHEAD_RETRANS_ACKED;
                } else {
                        ca_seq_rtt = now - scb->when;
                        last_ackt = skb->tstamp;
                        }
                        if (!(sacked & TCPCB_SACKED_ACKED))
                                reord = min(pkts_acked, reord);
 +                      if (!after(scb->end_seq, tp->high_seq))
 +                              flag |= FLAG_ORIG_SACK_ACKED;
                }
  
                if (sacked & TCPCB_SACKED_ACKED)
@@@ -3250,6 -3402,150 +3247,6 @@@ static int tcp_ack_update_window(struc
        return flag;
  }
  
 -/* A very conservative spurious RTO response algorithm: reduce cwnd and
 - * continue in congestion avoidance.
 - */
 -static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 -{
 -      tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
 -      tp->snd_cwnd_cnt = 0;
 -      TCP_ECN_queue_cwr(tp);
 -      tcp_moderate_cwnd(tp);
 -}
 -
 -/* A conservative spurious RTO response algorithm: reduce cwnd using
 - * PRR and continue in congestion avoidance.
 - */
 -static void tcp_cwr_spur_to_response(struct sock *sk)
 -{
 -      tcp_enter_cwr(sk, 0);
 -}
 -
 -static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 -{
 -      if (flag & FLAG_ECE)
 -              tcp_cwr_spur_to_response(sk);
 -      else
 -              tcp_undo_cwr(sk, true);
 -}
 -
 -/* F-RTO spurious RTO detection algorithm (RFC4138)
 - *
 - * F-RTO affects during two new ACKs following RTO (well, almost, see inline
 - * comments). State (ACK number) is kept in frto_counter. When ACK advances
 - * window (but not to or beyond highest sequence sent before RTO):
 - *   On First ACK,  send two new segments out.
 - *   On Second ACK, RTO was likely spurious. Do spurious response (response
 - *                  algorithm is not part of the F-RTO detection algorithm
 - *                  given in RFC4138 but can be selected separately).
 - * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
 - * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
 - * of Nagle, this is done using frto_counter states 2 and 3, when a new data
 - * segment of any size sent during F-RTO, state 2 is upgraded to 3.
 - *
 - * Rationale: if the RTO was spurious, new ACKs should arrive from the
 - * original window even after we transmit two new data segments.
 - *
 - * SACK version:
 - *   on first step, wait until first cumulative ACK arrives, then move to
 - *   the second step. In second step, the next ACK decides.
 - *
 - * F-RTO is implemented (mainly) in four functions:
 - *   - tcp_use_frto() is used to determine if TCP is can use F-RTO
 - *   - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
 - *     called when tcp_use_frto() showed green light
 - *   - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
 - *   - tcp_enter_frto_loss() is called if there is not enough evidence
 - *     to prove that the RTO is indeed spurious. It transfers the control
 - *     from F-RTO to the conventional RTO recovery
 - */
 -static bool tcp_process_frto(struct sock *sk, int flag)
 -{
 -      struct tcp_sock *tp = tcp_sk(sk);
 -
 -      tcp_verify_left_out(tp);
 -
 -      /* Duplicate the behavior from Loss state (fastretrans_alert) */
 -      if (flag & FLAG_DATA_ACKED)
 -              inet_csk(sk)->icsk_retransmits = 0;
 -
 -      if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
 -          ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
 -              tp->undo_marker = 0;
 -
 -      if (!before(tp->snd_una, tp->frto_highmark)) {
 -              tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
 -              return true;
 -      }
 -
 -      if (!tcp_is_sackfrto(tp)) {
 -              /* RFC4138 shortcoming in step 2; should also have case c):
 -               * ACK isn't duplicate nor advances window, e.g., opposite dir
 -               * data, winupdate
 -               */
 -              if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
 -                      return true;
 -
 -              if (!(flag & FLAG_DATA_ACKED)) {
 -                      tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
 -                                          flag);
 -                      return true;
 -              }
 -      } else {
 -              if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
 -                      if (!tcp_packets_in_flight(tp)) {
 -                              tcp_enter_frto_loss(sk, 2, flag);
 -                              return true;
 -                      }
 -
 -                      /* Prevent sending of new data. */
 -                      tp->snd_cwnd = min(tp->snd_cwnd,
 -                                         tcp_packets_in_flight(tp));
 -                      return true;
 -              }
 -
 -              if ((tp->frto_counter >= 2) &&
 -                  (!(flag & FLAG_FORWARD_PROGRESS) ||
 -                   ((flag & FLAG_DATA_SACKED) &&
 -                    !(flag & FLAG_ONLY_ORIG_SACKED)))) {
 -                      /* RFC4138 shortcoming (see comment above) */
 -                      if (!(flag & FLAG_FORWARD_PROGRESS) &&
 -                          (flag & FLAG_NOT_DUP))
 -                              return true;
 -
 -                      tcp_enter_frto_loss(sk, 3, flag);
 -                      return true;
 -              }
 -      }
 -
 -      if (tp->frto_counter == 1) {
 -              /* tcp_may_send_now needs to see updated state */
 -              tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
 -              tp->frto_counter = 2;
 -
 -              if (!tcp_may_send_now(sk))
 -                      tcp_enter_frto_loss(sk, 2, flag);
 -
 -              return true;
 -      } else {
 -              switch (sysctl_tcp_frto_response) {
 -              case 2:
 -                      tcp_undo_spur_to_response(sk, flag);
 -                      break;
 -              case 1:
 -                      tcp_conservative_spur_to_response(tp);
 -                      break;
 -              default:
 -                      tcp_cwr_spur_to_response(sk);
 -                      break;
 -              }
 -              tp->frto_counter = 0;
 -              tp->undo_marker = 0;
 -              NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
 -      }
 -      return false;
 -}
 -
  /* RFC 5961 7 [ACK Throttling] */
  static void tcp_send_challenge_ack(struct sock *sk)
  {
        }
  }
  
 +/* This routine deals with acks during a TLP episode.
 + * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
 + */
 +static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
 +{
 +      struct tcp_sock *tp = tcp_sk(sk);
 +      bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
 +                           !(flag & (FLAG_SND_UNA_ADVANCED |
 +                                     FLAG_NOT_DUP | FLAG_DATA_SACKED));
 +
 +      /* Mark the end of TLP episode on receiving TLP dupack or when
 +       * ack is after tlp_high_seq.
 +       */
 +      if (is_tlp_dupack) {
 +              tp->tlp_high_seq = 0;
 +              return;
 +      }
 +
 +      if (after(ack, tp->tlp_high_seq)) {
 +              tp->tlp_high_seq = 0;
 +              /* Don't reduce cwnd if DSACK arrives for TLP retrans. */
 +              if (!(flag & FLAG_DSACKING_ACK)) {
 +                      tcp_init_cwnd_reduction(sk, true);
 +                      tcp_set_ca_state(sk, TCP_CA_CWR);
 +                      tcp_end_cwnd_reduction(sk);
 +                      tcp_set_ca_state(sk, TCP_CA_Open);
 +                      NET_INC_STATS_BH(sock_net(sk),
 +                                       LINUX_MIB_TCPLOSSPROBERECOVERY);
 +              }
 +      }
 +}
 +
  /* This routine deals with incoming acks, but not outgoing ones. */
  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
  {
        int prior_packets;
        int prior_sacked = tp->sacked_out;
        int pkts_acked = 0;
 -      bool frto_cwnd = false;
  
        /* If the ack is older than previous acks
         * then we can probably ignore it.
        if (after(ack, tp->snd_nxt))
                goto invalid_ack;
  
 -      if (tp->early_retrans_delayed)
 +      if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
 +          icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
                tcp_rearm_rto(sk);
  
        if (after(ack, prior_snd_una))
  
        pkts_acked = prior_packets - tp->packets_out;
  
 -      if (tp->frto_counter)
 -              frto_cwnd = tcp_process_frto(sk, flag);
 -      /* Guarantee sacktag reordering detection against wrap-arounds */
 -      if (before(tp->frto_highmark, tp->snd_una))
 -              tp->frto_highmark = 0;
 -
        if (tcp_ack_is_dubious(sk, flag)) {
                /* Advance CWND, if state allows this. */
 -              if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
 -                  tcp_may_raise_cwnd(sk, flag))
 +              if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
                        tcp_cong_avoid(sk, ack, prior_in_flight);
                is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
                tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
                                      is_dupack, flag);
        } else {
 -              if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
 +              if (flag & FLAG_DATA_ACKED)
                        tcp_cong_avoid(sk, ack, prior_in_flight);
        }
  
 +      if (tp->tlp_high_seq)
 +              tcp_process_tlp_ack(sk, ack, flag);
 +
        if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
                struct dst_entry *dst = __sk_dst_get(sk);
                if (dst)
                        dst_confirm(dst);
        }
 +
 +      if (icsk->icsk_pending == ICSK_TIME_RETRANS)
 +              tcp_schedule_loss_probe(sk);
        return 1;
  
  no_queue:
         */
        if (tcp_send_head(sk))
                tcp_ack_probe(sk);
 +
 +      if (tp->tlp_high_seq)
 +              tcp_process_tlp_ack(sk, ack, flag);
        return 1;
  
  invalid_ack:
@@@ -3450,8 -3712,8 +3447,8 @@@ old_ack
   * But, this can also be called on packets in the established flow when
   * the fast version below fails.
   */
 -void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
 -                     const u8 **hvpp, int estab,
 +void tcp_parse_options(const struct sk_buff *skb,
 +                     struct tcp_options_received *opt_rx, int estab,
                       struct tcp_fastopen_cookie *foc)
  {
        const unsigned char *ptr;
                                 */
                                break;
  #endif
 -                      case TCPOPT_COOKIE:
 -                              /* This option is variable length.
 -                               */
 -                              switch (opsize) {
 -                              case TCPOLEN_COOKIE_BASE:
 -                                      /* not yet implemented */
 -                                      break;
 -                              case TCPOLEN_COOKIE_PAIR:
 -                                      /* not yet implemented */
 -                                      break;
 -                              case TCPOLEN_COOKIE_MIN+0:
 -                              case TCPOLEN_COOKIE_MIN+2:
 -                              case TCPOLEN_COOKIE_MIN+4:
 -                              case TCPOLEN_COOKIE_MIN+6:
 -                              case TCPOLEN_COOKIE_MAX:
 -                                      /* 16-bit multiple */
 -                                      opt_rx->cookie_plus = opsize;
 -                                      *hvpp = ptr;
 -                                      break;
 -                              default:
 -                                      /* ignore option */
 -                                      break;
 -                              }
 -                              break;
 -
                        case TCPOPT_EXP:
                                /* Fast Open option shares code 254 using a
                                 * 16 bits magic number. It's valid only in
@@@ -3580,7 -3867,8 +3577,7 @@@ static bool tcp_parse_aligned_timestamp
   * If it is wrong it falls back on tcp_parse_options().
   */
  static bool tcp_fast_parse_options(const struct sk_buff *skb,
 -                                 const struct tcphdr *th,
 -                                 struct tcp_sock *tp, const u8 **hvpp)
 +                                 const struct tcphdr *th, struct tcp_sock *tp)
  {
        /* In the spirit of fast parsing, compare doff directly to constant
         * values.  Because equality is used, short doff can be ignored here.
                        return true;
        }
  
 -      tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
 +      tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
        if (tp->rx_opt.saw_tstamp)
                tp->rx_opt.rcv_tsecr -= tp->tsoffset;
  
  static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
                                  const struct tcphdr *th, int syn_inerr)
  {
        struct tcp_sock *tp = tcp_sk(sk);
  
        /* RFC1323: H1. Apply PAWS check first. */
 -      if (tcp_fast_parse_options(skb, th, tp, &hash_location) &&
 -          tp->rx_opt.saw_tstamp &&
 +      if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
            tcp_paws_discard(sk, skb)) {
                if (!th->rst) {
                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@@@ -5332,11 -5622,12 +5329,11 @@@ static bool tcp_rcv_fastopen_synack(str
  
        if (mss == tp->rx_opt.user_mss) {
                struct tcp_options_received opt;
 -              const u8 *hash_location;
  
                /* Get original SYNACK MSS value if user MSS sets mss_clamp */
                tcp_clear_options(&opt);
                opt.user_mss = opt.mss_clamp = 0;
 -              tcp_parse_options(synack, &opt, &hash_location, 0, NULL);
 +              tcp_parse_options(synack, &opt, 0, NULL);
                mss = opt.mss_clamp;
        }
  
  static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                                         const struct tcphdr *th, unsigned int len)
  {
 -      const u8 *hash_location;
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct tcp_fastopen_cookie foc = { .len = -1 };
        int saved_clamp = tp->rx_opt.mss_clamp;
  
 -      tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc);
 +      tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
        if (tp->rx_opt.saw_tstamp)
                tp->rx_opt.rcv_tsecr -= tp->tsoffset;
  
                 * is initialized. */
                tp->copied_seq = tp->rcv_nxt;
  
 -              if (cvp != NULL &&
 -                  cvp->cookie_pair_size > 0 &&
 -                  tp->rx_opt.cookie_plus > 0) {
 -                      int cookie_size = tp->rx_opt.cookie_plus
 -                                      - TCPOLEN_COOKIE_BASE;
 -                      int cookie_pair_size = cookie_size
 -                                           + cvp->cookie_desired;
 -
 -                      /* A cookie extension option was sent and returned.
 -                       * Note that each incoming SYNACK replaces the
 -                       * Responder cookie.  The initial exchange is most
 -                       * fragile, as protection against spoofing relies
 -                       * entirely upon the sequence and timestamp (above).
 -                       * This replacement strategy allows the correct pair to
 -                       * pass through, while any others will be filtered via
 -                       * Responder verification later.
 -                       */
 -                      if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
 -                              memcpy(&cvp->cookie_pair[cvp->cookie_desired],
 -                                     hash_location, cookie_size);
 -                              cvp->cookie_pair_size = cookie_pair_size;
 -                      }
 -              }
 -
                smp_mb();
  
                tcp_finish_connect(sk, skb);
diff --combined net/ipv6/addrconf.c
@@@ -70,7 -70,6 +70,7 @@@
  #include <net/snmp.h>
  
  #include <net/af_ieee802154.h>
 +#include <net/firewire.h>
  #include <net/ipv6.h>
  #include <net/protocol.h>
  #include <net/ndisc.h>
@@@ -545,7 -544,8 +545,7 @@@ static const struct nla_policy devconf_
  };
  
  static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
 -                                   struct nlmsghdr *nlh,
 -                                   void *arg)
 +                                   struct nlmsghdr *nlh)
  {
        struct net *net = sock_net(in_skb->sk);
        struct nlattr *tb[NETCONFA_MAX+1];
@@@ -605,77 -605,6 +605,77 @@@ errout
        return err;
  }
  
 +static int inet6_netconf_dump_devconf(struct sk_buff *skb,
 +                                    struct netlink_callback *cb)
 +{
 +      struct net *net = sock_net(skb->sk);
 +      int h, s_h;
 +      int idx, s_idx;
 +      struct net_device *dev;
 +      struct inet6_dev *idev;
 +      struct hlist_head *head;
 +
 +      s_h = cb->args[0];
 +      s_idx = idx = cb->args[1];
 +
 +      for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 +              idx = 0;
 +              head = &net->dev_index_head[h];
 +              rcu_read_lock();
 +              cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
 +                        net->dev_base_seq;
 +              hlist_for_each_entry_rcu(dev, head, index_hlist) {
 +                      if (idx < s_idx)
 +                              goto cont;
 +                      idev = __in6_dev_get(dev);
 +                      if (!idev)
 +                              goto cont;
 +
 +                      if (inet6_netconf_fill_devconf(skb, dev->ifindex,
 +                                                     &idev->cnf,
 +                                                     NETLINK_CB(cb->skb).portid,
 +                                                     cb->nlh->nlmsg_seq,
 +                                                     RTM_NEWNETCONF,
 +                                                     NLM_F_MULTI,
 +                                                     -1) <= 0) {
 +                              rcu_read_unlock();
 +                              goto done;
 +                      }
 +                      nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 +cont:
 +                      idx++;
 +              }
 +              rcu_read_unlock();
 +      }
 +      if (h == NETDEV_HASHENTRIES) {
 +              if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
 +                                             net->ipv6.devconf_all,
 +                                             NETLINK_CB(cb->skb).portid,
 +                                             cb->nlh->nlmsg_seq,
 +                                             RTM_NEWNETCONF, NLM_F_MULTI,
 +                                             -1) <= 0)
 +                      goto done;
 +              else
 +                      h++;
 +      }
 +      if (h == NETDEV_HASHENTRIES + 1) {
 +              if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
 +                                             net->ipv6.devconf_dflt,
 +                                             NETLINK_CB(cb->skb).portid,
 +                                             cb->nlh->nlmsg_seq,
 +                                             RTM_NEWNETCONF, NLM_F_MULTI,
 +                                             -1) <= 0)
 +                      goto done;
 +              else
 +                      h++;
 +      }
 +done:
 +      cb->args[0] = h;
 +      cb->args[1] = idx;
 +
 +      return skb->len;
 +}
 +
  #ifdef CONFIG_SYSCTL
  static void dev_forward_change(struct inet6_dev *idev)
  {
@@@ -1739,20 -1668,6 +1739,20 @@@ static int addrconf_ifid_eui64(u8 *eui
        return 0;
  }
  
 +static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev)
 +{
 +      union fwnet_hwaddr *ha;
 +
 +      if (dev->addr_len != FWNET_ALEN)
 +              return -1;
 +
 +      ha = (union fwnet_hwaddr *)dev->dev_addr;
 +
 +      memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id));
 +      eui[0] ^= 2;
 +      return 0;
 +}
 +
  static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
  {
        /* XXX: inherit EUI-64 from other interface -- yoshfuji */
@@@ -1817,8 -1732,6 +1817,8 @@@ static int ipv6_generate_eui64(u8 *eui
                return addrconf_ifid_gre(eui, dev);
        case ARPHRD_IEEE802154:
                return addrconf_ifid_eui64(eui, dev);
 +      case ARPHRD_IEEE1394:
 +              return addrconf_ifid_ieee1394(eui, dev);
        }
        return -1;
  }
@@@ -2660,8 -2573,7 +2660,8 @@@ static void addrconf_dev_config(struct 
            (dev->type != ARPHRD_FDDI) &&
            (dev->type != ARPHRD_ARCNET) &&
            (dev->type != ARPHRD_INFINIBAND) &&
 -          (dev->type != ARPHRD_IEEE802154)) {
 +          (dev->type != ARPHRD_IEEE802154) &&
 +          (dev->type != ARPHRD_IEEE1394)) {
                /* Alas, we support only Ethernet autoconfiguration. */
                return;
        }
@@@ -3598,7 -3510,7 +3598,7 @@@ static const struct nla_policy ifa_ipv6
  };
  
  static int
 -inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 +inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
  {
        struct net *net = sock_net(skb->sk);
        struct ifaddrmsg *ifm;
@@@ -3664,7 -3576,7 +3664,7 @@@ static int inet6_addr_modify(struct ine
  }
  
  static int
 -inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 +inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
  {
        struct net *net = sock_net(skb->sk);
        struct ifaddrmsg *ifm;
@@@ -3895,7 -3807,6 +3895,7 @@@ static int in6_dump_addrs(struct inet6_
                                                NLM_F_MULTI);
                        if (err <= 0)
                                break;
 +                      nl_dump_check_consistent(cb, nlmsg_hdr(skb));
                }
                break;
        }
@@@ -3953,7 -3864,6 +3953,7 @@@ static int inet6_dump_addr(struct sk_bu
        s_ip_idx = ip_idx = cb->args[2];
  
        rcu_read_lock();
 +      cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;
        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
                idx = 0;
                head = &net->dev_index_head[h];
@@@ -4005,7 -3915,8 +4005,7 @@@ static int inet6_dump_ifacaddr(struct s
        return inet6_dump_addr(skb, cb, type);
  }
  
 -static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 -                           void *arg)
 +static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
  {
        struct net *net = sock_net(in_skb->sk);
        struct ifaddrmsg *ifm;
@@@ -4430,8 -4341,6 +4430,8 @@@ errout
  
  static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
  {
 +      struct net *net = dev_net(ifp->idev->dev);
 +
        inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
  
        switch (event) {
                        dst_free(&ifp->rt->dst);
                break;
        }
 +      atomic_inc(&net->ipv6.dev_addr_genid);
  }
  
  static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@@ -4876,26 -4784,20 +4876,20 @@@ static void addrconf_sysctl_unregister(
  
  static int __net_init addrconf_init_net(struct net *net)
  {
-       int err;
+       int err = -ENOMEM;
        struct ipv6_devconf *all, *dflt;
  
-       err = -ENOMEM;
-       all = &ipv6_devconf;
-       dflt = &ipv6_devconf_dflt;
+       all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
+       if (all == NULL)
+               goto err_alloc_all;
  
-       if (!net_eq(net, &init_net)) {
-               all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
-               if (all == NULL)
-                       goto err_alloc_all;
+       dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
+       if (dflt == NULL)
+               goto err_alloc_dflt;
  
-               dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
-               if (dflt == NULL)
-                       goto err_alloc_dflt;
-       } else {
-               /* these will be inherited by all namespaces */
-               dflt->autoconf = ipv6_defaults.autoconf;
-               dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
-       }
+       /* these will be inherited by all namespaces */
+       dflt->autoconf = ipv6_defaults.autoconf;
+       dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
  
        net->ipv6.devconf_all = all;
        net->ipv6.devconf_dflt = dflt;
@@@ -5032,7 -4934,7 +5026,7 @@@ int __init addrconf_init(void
        __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
                        inet6_dump_ifacaddr, NULL);
        __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
 -                      NULL, NULL);
 +                      inet6_netconf_dump_devconf, NULL);
  
        ipv6_addr_label_rtnl_register();