Re: Linux 4.9.134

From: Greg KH
Date: Thu Oct 18 2018 - 05:04:17 EST


diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 1506e948610c..d1f435c92912 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -10,6 +10,7 @@ Required properties:
Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on
the Cadence GEM, or the generic form: "cdns,gem".
Use "atmel,sama5d2-gem" for the GEM IP (10/100) available on Atmel sama5d2 SoCs.
+ Use "atmel,sama5d3-macb" for the 10/100Mbit IP available on Atmel sama5d3 SoCs.
Use "atmel,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs.
Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs.
Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 3db8c67d2c8d..dbdc4130e149 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -122,14 +122,11 @@ min_adv_mss - INTEGER

IP Fragmentation:

-ipfrag_high_thresh - INTEGER
- Maximum memory used to reassemble IP fragments. When
- ipfrag_high_thresh bytes of memory is allocated for this purpose,
- the fragment handler will toss packets until ipfrag_low_thresh
- is reached. This also serves as a maximum limit to namespaces
- different from the initial one.
-
-ipfrag_low_thresh - INTEGER
+ipfrag_high_thresh - LONG INTEGER
+ Maximum memory used to reassemble IP fragments.
+
+ipfrag_low_thresh - LONG INTEGER
+ (Obsolete since linux-4.17)
Maximum memory used to reassemble IP fragments before the kernel
begins to remove incomplete fragment queues to free up resources.
The kernel still accepts new fragments for defragmentation.
diff --git a/Makefile b/Makefile
index 18090f899a7c..46135e4333e6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 9
-SUBLEVEL = 133
+SUBLEVEL = 134
EXTRAVERSION =
NAME = Roaring Lionus

diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi
index 7cb235ef0fb6..6e9e1c2f9def 100644
--- a/arch/arm/boot/dts/sama5d3_emac.dtsi
+++ b/arch/arm/boot/dts/sama5d3_emac.dtsi
@@ -41,7 +41,7 @@
};

macb1: ethernet@f802c000 {
- compatible = "cdns,at91sam9260-macb", "cdns,macb";
+ compatible = "atmel,sama5d3-macb", "cdns,at91sam9260-macb", "cdns,macb";
reg = <0xf802c000 0x100>;
interrupts = <35 IRQ_TYPE_LEVEL_HIGH 3>;
pinctrl-names = "default";
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 739c0c594022..1bb90fafcdc3 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -356,5 +356,6 @@ struct kvm_sync_regs {

#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)

#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a8a86be8cf15..69a81a7daa24 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1220,9 +1220,8 @@ EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);

static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
{
- return kvm_apic_hw_enabled(apic) &&
- addr >= apic->base_address &&
- addr < apic->base_address + LAPIC_MMIO_LENGTH;
+ return addr >= apic->base_address &&
+ addr < apic->base_address + LAPIC_MMIO_LENGTH;
}

static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
@@ -1234,6 +1233,15 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;

+ if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
+ if (!kvm_check_has_quirk(vcpu->kvm,
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
+ return -EOPNOTSUPP;
+
+ memset(data, 0xff, len);
+ return 0;
+ }
+
kvm_lapic_reg_read(apic, offset, len, data);

return 0;
@@ -1646,6 +1654,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;

+ if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
+ if (!kvm_check_has_quirk(vcpu->kvm,
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
/*
* APIC register must be aligned on 128-bits boundary.
* 32/64/128 bits registers must be accessed thru 32 bits.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 47951f4775b9..d47c32a18da8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -505,7 +505,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,

while (true) {
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
- if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
if (timeout <= 0)
return -ETIME;
diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c
index 7aa7b9cb6203..efefcfa24a4c 100644
--- a/drivers/i2c/busses/i2c-scmi.c
+++ b/drivers/i2c/busses/i2c-scmi.c
@@ -152,6 +152,7 @@ acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
mt_params[3].type = ACPI_TYPE_INTEGER;
mt_params[3].integer.value = len;
mt_params[4].type = ACPI_TYPE_BUFFER;
+ mt_params[4].buffer.length = len;
mt_params[4].buffer.pointer = data->block + 1;
}
break;
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 7aab376ecb84..3785c638d530 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -548,8 +548,8 @@ static int usbhs_omap_get_dt_pdata(struct device *dev,
}

static const struct of_device_id usbhs_child_match_table[] = {
- { .compatible = "ti,omap-ehci", },
- { .compatible = "ti,omap-ohci", },
+ { .compatible = "ti,ehci-omap", },
+ { .compatible = "ti,ohci-omap3", },
{ }
};

@@ -875,6 +875,7 @@ static struct platform_driver usbhs_omap_driver = {
.pm = &usbhsomap_dev_pm_ops,
.of_match_table = usbhs_omap_dt_ids,
},
+ .probe = usbhs_omap_probe,
.remove = usbhs_omap_remove,
};

@@ -884,9 +885,9 @@ MODULE_ALIAS("platform:" USBHS_DRIVER_NAME);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI");

-static int __init omap_usbhs_drvinit(void)
+static int omap_usbhs_drvinit(void)
{
- return platform_driver_probe(&usbhs_omap_driver, usbhs_omap_probe);
+ return platform_driver_register(&usbhs_omap_driver);
}

/*
@@ -898,7 +899,7 @@ static int __init omap_usbhs_drvinit(void)
*/
fs_initcall_sync(omap_usbhs_drvinit);

-static void __exit omap_usbhs_drvexit(void)
+static void omap_usbhs_drvexit(void)
{
platform_driver_unregister(&usbhs_omap_driver);
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8a5e0ae4e4c0..b1ea29d8ad1a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -216,6 +216,7 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
static void bond_slave_arr_handler(struct work_struct *work);
static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
int mod);
+static void bond_netdev_notify_work(struct work_struct *work);

/*---------------------------- General routines -----------------------------*/

@@ -1250,6 +1251,8 @@ static struct slave *bond_alloc_slave(struct bonding *bond)
return NULL;
}
}
+ INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work);
+
return slave;
}

@@ -1257,6 +1260,7 @@ static void bond_free_slave(struct slave *slave)
{
struct bonding *bond = bond_get_bond_by_slave(slave);

+ cancel_delayed_work_sync(&slave->notify_work);
if (BOND_MODE(bond) == BOND_MODE_8023AD)
kfree(SLAVE_AD_INFO(slave));

@@ -1278,39 +1282,26 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info)
info->link_failure_count = slave->link_failure_count;
}

-static void bond_netdev_notify(struct net_device *dev,
- struct netdev_bonding_info *info)
-{
- rtnl_lock();
- netdev_bonding_info_change(dev, info);
- rtnl_unlock();
-}
-
static void bond_netdev_notify_work(struct work_struct *_work)
{
- struct netdev_notify_work *w =
- container_of(_work, struct netdev_notify_work, work.work);
+ struct slave *slave = container_of(_work, struct slave,
+ notify_work.work);
+
+ if (rtnl_trylock()) {
+ struct netdev_bonding_info binfo;

- bond_netdev_notify(w->dev, &w->bonding_info);
- dev_put(w->dev);
- kfree(w);
+ bond_fill_ifslave(slave, &binfo.slave);
+ bond_fill_ifbond(slave->bond, &binfo.master);
+ netdev_bonding_info_change(slave->dev, &binfo);
+ rtnl_unlock();
+ } else {
+ queue_delayed_work(slave->bond->wq, &slave->notify_work, 1);
+ }
}

void bond_queue_slave_event(struct slave *slave)
{
- struct bonding *bond = slave->bond;
- struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC);
-
- if (!nnw)
- return;
-
- dev_hold(slave->dev);
- nnw->dev = slave->dev;
- bond_fill_ifslave(slave, &nnw->bonding_info.slave);
- bond_fill_ifbond(bond, &nnw->bonding_info.master);
- INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work);
-
- queue_delayed_work(slave->bond->wq, &nnw->work, 0);
+ queue_delayed_work(slave->bond->wq, &slave->notify_work, 0);
}

void bond_lower_state_changed(struct slave *slave)
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 2ce7ae97ac91..c2cd540e9c9e 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -744,7 +744,6 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds)
static int bcm_sf2_sw_resume(struct dsa_switch *ds)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- unsigned int port;
int ret;

ret = bcm_sf2_sw_rst(priv);
@@ -756,12 +755,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds)
if (priv->hw_params.num_gphy == 1)
bcm_sf2_gphy_enable_set(ds, true);

- for (port = 0; port < DSA_MAX_PORTS; port++) {
- if ((1 << port) & ds->enabled_port_mask)
- bcm_sf2_port_setup(ds, port, NULL);
- else if (dsa_is_cpu_port(ds, port))
- bcm_sf2_imp_setup(ds, port);
- }
+ ds->ops->setup(ds);

return 0;
}
@@ -1135,10 +1129,10 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
{
struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);

- /* Disable all ports and interrupts */
priv->wol_ports_mask = 0;
- bcm_sf2_sw_suspend(priv->dev->ds);
dsa_unregister_switch(priv->dev->ds);
+ /* Disable all ports and interrupts */
+ bcm_sf2_sw_suspend(priv->dev->ds);
bcm_sf2_mdio_unregister(priv);

return 0;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 91fbba58d033..16dc9ac7ecb6 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -828,14 +828,22 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
{
u32 reg;

- /* Stop monitoring MPD interrupt */
- intrl2_0_mask_set(priv, INTRL2_0_MPD);
-
/* Clear the MagicPacket detection logic */
reg = umac_readl(priv, UMAC_MPD_CTRL);
reg &= ~MPD_EN;
umac_writel(priv, reg, UMAC_MPD_CTRL);

+ reg = intrl2_0_readl(priv, INTRL2_CPU_STATUS);
+ if (reg & INTRL2_0_MPD)
+ netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n");
+
+ if (reg & INTRL2_0_BRCM_MATCH_TAG) {
+ reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) &
+ RXCHK_BRCM_TAG_MATCH_MASK;
+ netdev_info(priv->netdev,
+ "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg);
+ }
+
netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
}

@@ -868,11 +876,6 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
if (priv->irq0_stat & INTRL2_0_TX_RING_FULL)
bcm_sysport_tx_reclaim_all(priv);

- if (priv->irq0_stat & INTRL2_0_MPD) {
- netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n");
- bcm_sysport_resume_from_wol(priv);
- }
-
return IRQ_HANDLED;
}

@@ -1901,9 +1904,6 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv)
/* UniMAC receive needs to be turned on */
umac_enable_set(priv, CMD_RX_EN, 1);

- /* Enable the interrupt wake-up source */
- intrl2_0_mask_clear(priv, INTRL2_0_MPD);
-
netif_dbg(priv, wol, ndev, "entered WOL mode\n");

return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 72297b76944f..208e9dacfd34 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1666,8 +1666,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) {
tx_pkts++;
/* return full budget so NAPI will complete. */
- if (unlikely(tx_pkts > bp->tx_wake_thresh))
+ if (unlikely(tx_pkts > bp->tx_wake_thresh)) {
rx_pkts = budget;
+ raw_cons = NEXT_RAW_CMP(raw_cons);
+ break;
+ }
} else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &agg_event);
if (likely(rc >= 0))
@@ -1685,7 +1688,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
}
raw_cons = NEXT_RAW_CMP(raw_cons);

- if (rx_pkts == budget)
+ if (rx_pkts && rx_pkts == budget)
break;
}

@@ -1797,8 +1800,12 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
while (1) {
work_done += bnxt_poll_work(bp, bnapi, budget - work_done);

- if (work_done >= budget)
+ if (work_done >= budget) {
+ if (!budget)
+ BNXT_CP_DB_REARM(cpr->cp_doorbell,
+ cpr->cp_raw_cons);
break;
+ }

if (!bnxt_has_work(bp, cpr)) {
napi_complete(napi);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 2e1585635083..8f55c23e9821 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -2861,6 +2861,13 @@ static const struct macb_config at91sam9260_config = {
.init = macb_init,
};

+static const struct macb_config sama5d3macb_config = {
+ .caps = MACB_CAPS_SG_DISABLED
+ | MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII,
+ .clk_init = macb_clk_init,
+ .init = macb_init,
+};
+
static const struct macb_config pc302gem_config = {
.caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE,
.dma_burst_length = 16,
@@ -2925,6 +2932,7 @@ static const struct of_device_id macb_dt_ids[] = {
{ .compatible = "cdns,gem", .data = &pc302gem_config },
{ .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config },
{ .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config },
+ { .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config },
{ .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config },
{ .compatible = "cdns,at91rm9200-emac", .data = &emac_config },
{ .compatible = "cdns,emac", .data = &emac_config },
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index b6ed818f78ff..06bc8638501e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -80,7 +80,7 @@ static void hnae_unmap_buffer(struct hnae_ring *ring, struct hnae_desc_cb *cb)
if (cb->type == DESC_TYPE_SKB)
dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
ring_to_dma_dir(ring));
- else
+ else if (cb->length)
dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
ring_to_dma_dir(ring));
}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 8a2a07e21324..92ed6534ceae 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -39,9 +39,9 @@
#define SKB_TMP_LEN(SKB) \
(((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB))

-static void fill_v2_desc(struct hnae_ring *ring, void *priv,
- int size, dma_addr_t dma, int frag_end,
- int buf_num, enum hns_desc_type type, int mtu)
+static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size,
+ int send_sz, dma_addr_t dma, int frag_end,
+ int buf_num, enum hns_desc_type type, int mtu)
{
struct hnae_desc *desc = &ring->desc[ring->next_to_use];
struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
@@ -63,7 +63,7 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv,
desc_cb->type = type;

desc->addr = cpu_to_le64(dma);
- desc->tx.send_size = cpu_to_le16((u16)size);
+ desc->tx.send_size = cpu_to_le16((u16)send_sz);

/* config bd buffer end */
hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1);
@@ -132,6 +132,14 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv,
ring_ptr_move_fw(ring, next_to_use);
}

+static void fill_v2_desc(struct hnae_ring *ring, void *priv,
+ int size, dma_addr_t dma, int frag_end,
+ int buf_num, enum hns_desc_type type, int mtu)
+{
+ fill_v2_desc_hw(ring, priv, size, size, dma, frag_end,
+ buf_num, type, mtu);
+}
+
static const struct acpi_device_id hns_enet_acpi_match[] = {
{ "HISI00C1", 0 },
{ "HISI00C2", 0 },
@@ -288,15 +296,15 @@ static void fill_tso_desc(struct hnae_ring *ring, void *priv,

/* when the frag size is bigger than hardware, split this frag */
for (k = 0; k < frag_buf_num; k++)
- fill_v2_desc(ring, priv,
- (k == frag_buf_num - 1) ?
+ fill_v2_desc_hw(ring, priv, k == 0 ? size : 0,
+ (k == frag_buf_num - 1) ?
sizeoflast : BD_MAX_SEND_SIZE,
- dma + BD_MAX_SEND_SIZE * k,
- frag_end && (k == frag_buf_num - 1) ? 1 : 0,
- buf_num,
- (type == DESC_TYPE_SKB && !k) ?
+ dma + BD_MAX_SEND_SIZE * k,
+ frag_end && (k == frag_buf_num - 1) ? 1 : 0,
+ buf_num,
+ (type == DESC_TYPE_SKB && !k) ?
DESC_TYPE_SKB : DESC_TYPE_PAGE,
- mtu);
+ mtu);
}

netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev,
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 7e2ebfc565ee..ff62dc7485d5 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -29,6 +29,7 @@
#include <linux/clk.h>
#include <linux/hrtimer.h>
#include <linux/ktime.h>
+#include <linux/if_vlan.h>
#include <uapi/linux/ppp_defs.h>
#include <net/ip.h>
#include <net/ipv6.h>
@@ -4266,7 +4267,7 @@ static void mvpp2_txq_desc_put(struct mvpp2_tx_queue *txq)
}

/* Set Tx descriptors fields relevant for CSUM calculation */
-static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto,
+static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto,
int ip_hdr_len, int l4_proto)
{
u32 command;
@@ -5019,14 +5020,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
if (skb->ip_summed == CHECKSUM_PARTIAL) {
int ip_hdr_len = 0;
u8 l4_proto;
+ __be16 l3_proto = vlan_get_protocol(skb);

- if (skb->protocol == htons(ETH_P_IP)) {
+ if (l3_proto == htons(ETH_P_IP)) {
struct iphdr *ip4h = ip_hdr(skb);

/* Calculate IPv4 checksum and L4 checksum */
ip_hdr_len = ip4h->ihl;
l4_proto = ip4h->protocol;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ } else if (l3_proto == htons(ETH_P_IPV6)) {
struct ipv6hdr *ip6h = ipv6_hdr(skb);

/* Read l4_protocol from one of IPv6 extra headers */
@@ -5038,7 +5040,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
}

return mvpp2_txq_desc_csum(skb_network_offset(skb),
- skb->protocol, ip_hdr_len, l4_proto);
+ l3_proto, ip_hdr_len, l4_proto);
}

return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 49bad00a0f8f..5ddadcd0c8db 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1800,7 +1800,8 @@ struct qlcnic_hardware_ops {
int (*config_loopback) (struct qlcnic_adapter *, u8);
int (*clear_loopback) (struct qlcnic_adapter *, u8);
int (*config_promisc_mode) (struct qlcnic_adapter *, u32);
- void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16);
+ void (*change_l2_filter)(struct qlcnic_adapter *adapter, u64 *addr,
+ u16 vlan, struct qlcnic_host_tx_ring *tx_ring);
int (*get_board_info) (struct qlcnic_adapter *);
void (*set_mac_filter_count) (struct qlcnic_adapter *);
void (*free_mac_list) (struct qlcnic_adapter *);
@@ -2042,9 +2043,10 @@ static inline int qlcnic_nic_set_promisc(struct qlcnic_adapter *adapter,
}

static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter,
- u64 *addr, u16 id)
+ u64 *addr, u16 vlan,
+ struct qlcnic_host_tx_ring *tx_ring)
{
- adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id);
+ adapter->ahw->hw_ops->change_l2_filter(adapter, addr, vlan, tx_ring);
}

static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index c3c28f0960e5..05d32e86bcf7 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -2132,7 +2132,8 @@ int qlcnic_83xx_sre_macaddr_change(struct qlcnic_adapter *adapter, u8 *addr,
}

void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr,
- u16 vlan_id)
+ u16 vlan_id,
+ struct qlcnic_host_tx_ring *tx_ring)
{
u8 mac[ETH_ALEN];
memcpy(&mac, addr, ETH_ALEN);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
index 331ae2c20f40..c8e012b3f7e7 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
@@ -550,7 +550,8 @@ int qlcnic_83xx_wrt_reg_indirect(struct qlcnic_adapter *, ulong, u32);
int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32);
int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int);
int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int);
-void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16);
+void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr,
+ u16 vlan, struct qlcnic_host_tx_ring *ring);
int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *);
int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *);
void qlcnic_83xx_initialize_nic(struct qlcnic_adapter *, int);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
index 4bb33af8e2b3..56a3bd9e37dc 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
@@ -173,7 +173,8 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter,
struct net_device *netdev);
void qlcnic_82xx_get_beacon_state(struct qlcnic_adapter *);
void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter,
- u64 *uaddr, u16 vlan_id);
+ u64 *uaddr, u16 vlan_id,
+ struct qlcnic_host_tx_ring *tx_ring);
int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *,
struct ethtool_coalesce *);
int qlcnic_82xx_set_rx_coalesce(struct qlcnic_adapter *);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index fedd7366713c..e36129401b71 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -268,13 +268,12 @@ static void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter,
}

void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr,
- u16 vlan_id)
+ u16 vlan_id, struct qlcnic_host_tx_ring *tx_ring)
{
struct cmd_desc_type0 *hwdesc;
struct qlcnic_nic_req *req;
struct qlcnic_mac_req *mac_req;
struct qlcnic_vlan_req *vlan_req;
- struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring;
u32 producer;
u64 word;

@@ -301,7 +300,8 @@ void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr,

static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
struct cmd_desc_type0 *first_desc,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ struct qlcnic_host_tx_ring *tx_ring)
{
struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data);
struct ethhdr *phdr = (struct ethhdr *)(skb->data);
@@ -335,7 +335,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
tmp_fil->vlan_id == vlan_id) {
if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime))
qlcnic_change_filter(adapter, &src_addr,
- vlan_id);
+ vlan_id, tx_ring);
tmp_fil->ftime = jiffies;
return;
}
@@ -350,7 +350,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
if (!fil)
return;

- qlcnic_change_filter(adapter, &src_addr, vlan_id);
+ qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring);
fil->ftime = jiffies;
fil->vlan_id = vlan_id;
memcpy(fil->faddr, &src_addr, ETH_ALEN);
@@ -766,7 +766,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
}

if (adapter->drv_mac_learn)
- qlcnic_send_filter(adapter, first_desc, skb);
+ qlcnic_send_filter(adapter, first_desc, skb, tx_ring);

tx_ring->tx_stats.tx_bytes += skb->len;
tx_ring->tx_stats.xmit_called++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 890e4b083f4f..2019e163e0e9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -71,7 +71,7 @@ static int dwmac1000_validate_mcast_bins(int mcast_bins)
* Description:
* This function validates the number of Unicast address entries supported
* by a particular Synopsys 10/100/1000 controller. The Synopsys controller
- * supports 1, 32, 64, or 128 Unicast filter entries for it's Unicast filter
+ * supports 1..32, 64, or 128 Unicast filter entries for it's Unicast filter
* logic. This function validates a valid, supported configuration is
* selected, and defaults to 1 Unicast address if an unsupported
* configuration is selected.
@@ -81,8 +81,7 @@ static int dwmac1000_validate_ucast_entries(int ucast_entries)
int x = ucast_entries;

switch (x) {
- case 1:
- case 32:
+ case 1 ... 32:
case 64:
case 128:
break;
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index f9ec00981b1e..9670aa23ffb9 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1171,6 +1171,11 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
return -EBUSY;
}

+ if (dev == port_dev) {
+ netdev_err(dev, "Cannot enslave team device to itself\n");
+ return -EINVAL;
+ }
+
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 0d4440f28f6b..2b728cc52e3a 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -934,6 +934,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */
{QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */
{QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */
+ {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */
{QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */
{QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */
{QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 03d04011d653..8d3f938c6a51 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -1518,6 +1518,7 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
{
struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
if (pdata) {
+ cancel_work_sync(&pdata->set_multicast);
netif_dbg(dev, ifdown, dev->net, "free pdata\n");
kfree(pdata);
pdata = NULL;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 0824a8164a24..07ea4fcf4f88 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -440,8 +440,8 @@ struct atio_from_isp {
static inline int fcpcmd_is_corrupted(struct atio *atio)
{
if (atio->entry_type == ATIO_TYPE7 &&
- (le16_to_cpu(atio->attr_n_length & FCP_CMD_LENGTH_MASK) <
- FCP_CMD_LENGTH_MIN))
+ ((le16_to_cpu(atio->attr_n_length) & FCP_CMD_LENGTH_MASK) <
+ FCP_CMD_LENGTH_MIN))
return 1;
else
return 0;
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 04d2b6e25503..80205f3362d4 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1435,7 +1435,8 @@ static void iscsit_do_crypto_hash_buf(

sg_init_table(sg, ARRAY_SIZE(sg));
sg_set_buf(sg, buf, payload_length);
- sg_set_buf(sg + 1, pad_bytes, padding);
+ if (padding)
+ sg_set_buf(sg + 1, pad_bytes, padding);

ahash_request_set_crypt(hash, sg, data_crc, payload_length + padding);

@@ -3949,10 +3950,14 @@ static bool iscsi_target_check_conn_state(struct iscsi_conn *conn)
static void iscsit_get_rx_pdu(struct iscsi_conn *conn)
{
int ret;
- u8 buffer[ISCSI_HDR_LEN], opcode;
+ u8 *buffer, opcode;
u32 checksum = 0, digest = 0;
struct kvec iov;

+ buffer = kcalloc(ISCSI_HDR_LEN, sizeof(*buffer), GFP_KERNEL);
+ if (!buffer)
+ return;
+
while (!kthread_should_stop()) {
/*
* Ensure that both TX and RX per connection kthreads
@@ -3960,7 +3965,6 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *conn)
*/
iscsit_thread_check_cpumask(conn, current, 0);

- memset(buffer, 0, ISCSI_HDR_LEN);
memset(&iov, 0, sizeof(struct kvec));

iov.iov_base = buffer;
@@ -3969,7 +3973,7 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *conn)
ret = rx_data(conn, &iov, 1, ISCSI_HDR_LEN);
if (ret != ISCSI_HDR_LEN) {
iscsit_rx_thread_wait_for_tcp(conn);
- return;
+ break;
}

if (conn->conn_ops->HeaderDigest) {
@@ -3979,7 +3983,7 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *conn)
ret = rx_data(conn, &iov, 1, ISCSI_CRC_LEN);
if (ret != ISCSI_CRC_LEN) {
iscsit_rx_thread_wait_for_tcp(conn);
- return;
+ break;
}

iscsit_do_crypto_hash_buf(conn->conn_rx_hash,
@@ -4003,7 +4007,7 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *conn)
}

if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT)
- return;
+ break;

opcode = buffer[0] & ISCSI_OPCODE_MASK;

@@ -4014,13 +4018,15 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *conn)
" while in Discovery Session, rejecting.\n", opcode);
iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR,
buffer);
- return;
+ break;
}

ret = iscsi_target_rx_opcode(conn, buffer);
if (ret < 0)
- return;
+ break;
}
+
+ kfree(buffer);
}

int iscsi_target_rx_thread(void *arg)
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 0722f75f1d6a..45a03eff4db1 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1072,17 +1072,17 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
temp = readl(port_array[wIndex]);
break;
}
-
- /* Software should not attempt to set
- * port link state above '3' (U3) and the port
- * must be enabled.
- */
- if ((temp & PORT_PE) == 0 ||
- (link_state > USB_SS_PORT_LS_U3)) {
- xhci_warn(xhci, "Cannot set link state.\n");
+ /* Port must be enabled */
+ if (!(temp & PORT_PE)) {
+ retval = -ENODEV;
+ break;
+ }
+ /* Can't set port link state above '3' (U3) */
+ if (link_state > USB_SS_PORT_LS_U3) {
+ xhci_warn(xhci, "Cannot set port %d link state %d\n",
+ wIndex, link_state);
goto error;
}
-
if (link_state == USB_SS_PORT_LS_U3) {
slot_id = xhci_find_slot_id_by_port(hcd, xhci,
wIndex + 1);
diff --git a/drivers/video/fbdev/aty/atyfb.h b/drivers/video/fbdev/aty/atyfb.h
index 63c4842eb224..46e0e8b39b76 100644
--- a/drivers/video/fbdev/aty/atyfb.h
+++ b/drivers/video/fbdev/aty/atyfb.h
@@ -332,6 +332,8 @@ extern const struct aty_pll_ops aty_pll_ct; /* Integrated */
extern void aty_set_pll_ct(const struct fb_info *info, const union aty_pll *pll);
extern u8 aty_ld_pll_ct(int offset, const struct atyfb_par *par);

+extern const u8 aty_postdividers[8];
+

/*
* Hardware cursor support
@@ -358,7 +360,6 @@ static inline void wait_for_idle(struct atyfb_par *par)

extern void aty_reset_engine(const struct atyfb_par *par);
extern void aty_init_engine(struct atyfb_par *par, struct fb_info *info);
-extern u8 aty_ld_pll_ct(int offset, const struct atyfb_par *par);

void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area);
void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c
index 81367cf0af77..da748c39196c 100644
--- a/drivers/video/fbdev/aty/atyfb_base.c
+++ b/drivers/video/fbdev/aty/atyfb_base.c
@@ -3093,17 +3093,18 @@ static int atyfb_setup_sparc(struct pci_dev *pdev, struct fb_info *info,
/*
* PLL Reference Divider M:
*/
- M = pll_regs[2];
+ M = pll_regs[PLL_REF_DIV];

/*
* PLL Feedback Divider N (Dependent on CLOCK_CNTL):
*/
- N = pll_regs[7 + (clock_cntl & 3)];
+ N = pll_regs[VCLK0_FB_DIV + (clock_cntl & 3)];

/*
* PLL Post Divider P (Dependent on CLOCK_CNTL):
*/
- P = 1 << (pll_regs[6] >> ((clock_cntl & 3) << 1));
+ P = aty_postdividers[((pll_regs[VCLK_POST_DIV] >> ((clock_cntl & 3) << 1)) & 3) |
+ ((pll_regs[PLL_EXT_CNTL] >> (2 + (clock_cntl & 3))) & 4)];

/*
* PLL Divider Q:
diff --git a/drivers/video/fbdev/aty/mach64_ct.c b/drivers/video/fbdev/aty/mach64_ct.c
index 51f29d627ceb..af54256a20a1 100644
--- a/drivers/video/fbdev/aty/mach64_ct.c
+++ b/drivers/video/fbdev/aty/mach64_ct.c
@@ -114,7 +114,7 @@ static void aty_st_pll_ct(int offset, u8 val, const struct atyfb_par *par)
*/

#define Maximum_DSP_PRECISION 7
-static u8 postdividers[] = {1,2,4,8,3};
+const u8 aty_postdividers[8] = {1,2,4,8,3,5,6,12};

static int aty_dsp_gt(const struct fb_info *info, u32 bpp, struct pll_ct *pll)
{
@@ -221,7 +221,7 @@ static int aty_valid_pll_ct(const struct fb_info *info, u32 vclk_per, struct pll
pll->vclk_post_div += (q < 64*8);
pll->vclk_post_div += (q < 32*8);
}
- pll->vclk_post_div_real = postdividers[pll->vclk_post_div];
+ pll->vclk_post_div_real = aty_postdividers[pll->vclk_post_div];
// pll->vclk_post_div <<= 6;
pll->vclk_fb_div = q * pll->vclk_post_div_real / 8;
pllvclk = (1000000 * 2 * pll->vclk_fb_div) /
@@ -512,7 +512,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll)
u8 mclk_fb_div, pll_ext_cntl;
pll->ct.pll_ref_div = aty_ld_pll_ct(PLL_REF_DIV, par);
pll_ext_cntl = aty_ld_pll_ct(PLL_EXT_CNTL, par);
- pll->ct.xclk_post_div_real = postdividers[pll_ext_cntl & 0x07];
+ pll->ct.xclk_post_div_real = aty_postdividers[pll_ext_cntl & 0x07];
mclk_fb_div = aty_ld_pll_ct(MCLK_FB_DIV, par);
if (pll_ext_cntl & PLL_MFB_TIMES_4_2B)
mclk_fb_div <<= 1;
@@ -534,7 +534,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll)
xpost_div += (q < 64*8);
xpost_div += (q < 32*8);
}
- pll->ct.xclk_post_div_real = postdividers[xpost_div];
+ pll->ct.xclk_post_div_real = aty_postdividers[xpost_div];
pll->ct.mclk_fb_div = q * pll->ct.xclk_post_div_real / 8;

#ifdef CONFIG_PPC
@@ -583,7 +583,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll)
mpost_div += (q < 64*8);
mpost_div += (q < 32*8);
}
- sclk_post_div_real = postdividers[mpost_div];
+ sclk_post_div_real = aty_postdividers[mpost_div];
pll->ct.sclk_fb_div = q * sclk_post_div_real / 8;
pll->ct.spll_cntl2 = mpost_div << 4;
#ifdef DEBUG
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c10180d0b018..7d6da09e637b 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -657,7 +657,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s,
next = EXT4_XATTR_NEXT(last);
if ((void *)next >= s->end) {
EXT4_ERROR_INODE(inode, "corrupted xattr entries");
- return -EIO;
+ return -EFSCORRUPTED;
}
if (last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 47c7f5b8f675..f254982e1a8f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2338,6 +2338,13 @@ struct netdev_notifier_info {
struct net_device *dev;
};

+struct netdev_notifier_info_ext {
+ struct netdev_notifier_info info; /* must be first */
+ union {
+ u32 mtu;
+ } ext;
+};
+
struct netdev_notifier_change_info {
struct netdev_notifier_info info; /* must be first */
unsigned int flags_changed;
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 85d1ffc90285..4421e5ccb092 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -138,7 +138,6 @@ struct rhashtable_params {
/**
* struct rhashtable - Hash table handle
* @tbl: Bucket table
- * @nelems: Number of elements in table
* @key_len: Key length for hashfn
* @elasticity: Maximum chain length before rehash
* @p: Configuration parameters
@@ -146,10 +145,10 @@ struct rhashtable_params {
* @run_work: Deferred worker to expand/shrink asynchronously
* @mutex: Mutex to protect current/future table swapping
* @lock: Spin lock to protect walker list
+ * @nelems: Number of elements in table
*/
struct rhashtable {
struct bucket_table __rcu *tbl;
- atomic_t nelems;
unsigned int key_len;
unsigned int elasticity;
struct rhashtable_params p;
@@ -157,6 +156,7 @@ struct rhashtable {
struct work_struct run_work;
struct mutex mutex;
spinlock_t lock;
+ atomic_t nelems;
};

/**
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1f207dd22757..e90fe6b83e00 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -643,9 +643,14 @@ struct sk_buff {
struct skb_mstamp skb_mstamp;
};
};
- struct rb_node rbnode; /* used in netem & tcp stack */
+ struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */
};
- struct sock *sk;
+
+ union {
+ struct sock *sk;
+ int ip_defrag_offset;
+ };
+
struct net_device *dev;

/*
@@ -2413,7 +2418,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
kfree_skb(skb);
}

-void skb_rbtree_purge(struct rb_root *root);
+unsigned int skb_rbtree_purge(struct rb_root *root);

void *netdev_alloc_frag(unsigned int fragsz);

@@ -2949,6 +2954,7 @@ static inline unsigned char *skb_push_rcsum(struct sk_buff *skb,
return skb->data;
}

+int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
/**
* pskb_trim_rcsum - trim received skb and update checksum
* @skb: buffer to trim
@@ -2962,9 +2968,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
{
if (likely(len >= skb->len))
return 0;
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_NONE;
- return __pskb_trim(skb, len);
+ return pskb_trim_rcsum_slow(skb, len);
}

static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
@@ -2984,6 +2988,12 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)

#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)

+#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
+#define skb_rb_first(root) rb_to_skb(rb_first(root))
+#define skb_rb_last(root) rb_to_skb(rb_last(root))
+#define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode))
+#define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode))
+
#define skb_queue_walk(queue, skb) \
for (skb = (queue)->next; \
skb != (struct sk_buff *)(queue); \
@@ -2998,6 +3008,18 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
for (; skb != (struct sk_buff *)(queue); \
skb = skb->next)

+#define skb_rbtree_walk(skb, root) \
+ for (skb = skb_rb_first(root); skb != NULL; \
+ skb = skb_rb_next(skb))
+
+#define skb_rbtree_walk_from(skb) \
+ for (; skb != NULL; \
+ skb = skb_rb_next(skb))
+
+#define skb_rbtree_walk_from_safe(skb, tmp) \
+ for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \
+ skb = tmp)
+
#define skb_queue_walk_from_safe(queue, skb, tmp) \
for (tmp = skb->next; \
skb != (struct sk_buff *)(queue); \
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 714428c54c68..8750c2c4871a 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -139,12 +139,6 @@ struct bond_parm_tbl {
int mode;
};

-struct netdev_notify_work {
- struct delayed_work work;
- struct net_device *dev;
- struct netdev_bonding_info bonding_info;
-};
-
struct slave {
struct net_device *dev; /* first - useful for panic debug */
struct bonding *bond; /* our master */
@@ -171,6 +165,7 @@ struct slave {
#ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll *np;
#endif
+ struct delayed_work notify_work;
struct kobject kobj;
struct rtnl_link_stats64 slave_stats;
};
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 634d19203e7d..a3812e9c8fee 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -1,14 +1,20 @@
#ifndef __NET_FRAG_H__
#define __NET_FRAG_H__

+#include <linux/rhashtable.h>
+
struct netns_frags {
- /* Keep atomic mem on separate cachelines in structs that include it */
- atomic_t mem ____cacheline_aligned_in_smp;
/* sysctls */
+ long high_thresh;
+ long low_thresh;
int timeout;
- int high_thresh;
- int low_thresh;
int max_dist;
+ struct inet_frags *f;
+
+ struct rhashtable rhashtable ____cacheline_aligned_in_smp;
+
+ /* Keep atomic mem on separate cachelines in structs that include it */
+ atomic_long_t mem ____cacheline_aligned_in_smp;
};

/**
@@ -24,130 +30,115 @@ enum {
INET_FRAG_COMPLETE = BIT(2),
};

+struct frag_v4_compare_key {
+ __be32 saddr;
+ __be32 daddr;
+ u32 user;
+ u32 vif;
+ __be16 id;
+ u16 protocol;
+};
+
+struct frag_v6_compare_key {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ u32 user;
+ __be32 id;
+ u32 iif;
+};
+
/**
* struct inet_frag_queue - fragment queue
*
- * @lock: spinlock protecting the queue
+ * @node: rhash node
+ * @key: keys identifying this frag.
* @timer: queue expiration timer
- * @list: hash bucket list
+ * @lock: spinlock protecting this frag
* @refcnt: reference count of the queue
* @fragments: received fragments head
+ * @rb_fragments: received fragments rb-tree root
* @fragments_tail: received fragments tail
+ * @last_run_head: the head of the last "run". see ip_fragment.c
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @net: namespace that this frag belongs to
- * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
+ * @rcu: rcu head for freeing deferall
*/
struct inet_frag_queue {
- spinlock_t lock;
+ struct rhash_head node;
+ union {
+ struct frag_v4_compare_key v4;
+ struct frag_v6_compare_key v6;
+ } key;
struct timer_list timer;
- struct hlist_node list;
+ spinlock_t lock;
atomic_t refcnt;
- struct sk_buff *fragments;
+ struct sk_buff *fragments; /* Used in IPv6. */
+ struct rb_root rb_fragments; /* Used in IPv4. */
struct sk_buff *fragments_tail;
+ struct sk_buff *last_run_head;
ktime_t stamp;
int len;
int meat;
__u8 flags;
u16 max_size;
- struct netns_frags *net;
- struct hlist_node list_evictor;
-};
-
-#define INETFRAGS_HASHSZ 1024
-
-/* averaged:
- * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
- * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
- * struct frag_queue))
- */
-#define INETFRAGS_MAXDEPTH 128
-
-struct inet_frag_bucket {
- struct hlist_head chain;
- spinlock_t chain_lock;
+ struct netns_frags *net;
+ struct rcu_head rcu;
};

struct inet_frags {
- struct inet_frag_bucket hash[INETFRAGS_HASHSZ];
-
- struct work_struct frags_work;
- unsigned int next_bucket;
- unsigned long last_rebuild_jiffies;
- bool rebuild;
-
- /* The first call to hashfn is responsible to initialize
- * rnd. This is best done with net_get_random_once.
- *
- * rnd_seqlock is used to let hash insertion detect
- * when it needs to re-lookup the hash chain to use.
- */
- u32 rnd;
- seqlock_t rnd_seqlock;
int qsize;

- unsigned int (*hashfn)(const struct inet_frag_queue *);
- bool (*match)(const struct inet_frag_queue *q,
- const void *arg);
void (*constructor)(struct inet_frag_queue *q,
const void *arg);
void (*destructor)(struct inet_frag_queue *);
void (*frag_expire)(unsigned long data);
struct kmem_cache *frags_cachep;
const char *frags_cache_name;
+ struct rhashtable_params rhash_params;
};

int inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);

-static inline void inet_frags_init_net(struct netns_frags *nf)
+static inline int inet_frags_init_net(struct netns_frags *nf)
{
- atomic_set(&nf->mem, 0);
+ atomic_long_set(&nf->mem, 0);
+ return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
}
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
+void inet_frags_exit_net(struct netns_frags *nf);

-void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f);
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key, unsigned int hash);
+void inet_frag_kill(struct inet_frag_queue *q);
+void inet_frag_destroy(struct inet_frag_queue *q);
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);

-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
- const char *prefix);
+/* Free all skbs in the queue; return the sum of their truesizes. */
+unsigned int inet_frag_rbtree_purge(struct rb_root *root);

-static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
+static inline void inet_frag_put(struct inet_frag_queue *q)
{
if (atomic_dec_and_test(&q->refcnt))
- inet_frag_destroy(q, f);
-}
-
-static inline bool inet_frag_evicting(struct inet_frag_queue *q)
-{
- return !hlist_unhashed(&q->list_evictor);
+ inet_frag_destroy(q);
}

/* Memory Tracking Functions. */

-static inline int frag_mem_limit(struct netns_frags *nf)
-{
- return atomic_read(&nf->mem);
-}
-
-static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
+static inline long frag_mem_limit(const struct netns_frags *nf)
{
- atomic_sub(i, &nf->mem);
+ return atomic_long_read(&nf->mem);
}

-static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
{
- atomic_add(i, &nf->mem);
+ atomic_long_sub(val, &nf->mem);
}

-static inline int sum_frag_mem_limit(struct netns_frags *nf)
+static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
{
- return atomic_read(&nf->mem);
+ atomic_long_add(val, &nf->mem);
}

/* RFC 3168 support :
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 0464b207d0cf..6213a90a8cec 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -132,12 +132,6 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
return sk->sk_bound_dev_if;
}

-static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
-{
- return rcu_dereference_check(ireq->ireq_opt,
- atomic_read(&ireq->req.rsk_refcnt) > 0);
-}
-
struct inet_cork {
unsigned int flags;
__be32 addr;
diff --git a/include/net/ip.h b/include/net/ip.h
index bc9b4deeb60e..8646da034851 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -548,7 +548,6 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s
return skb;
}
#endif
-int ip_frag_mem(struct net *net);

/*
* Functions provided by ip_forward.c
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 978387d6c3e6..a6446d72c5d9 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -363,6 +363,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev);
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
int fib_sync_down_addr(struct net_device *dev, __be32 local);
int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
+void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);

extern u32 fib_multipath_secret __read_mostly;

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 64b0e9df31c7..7cb100d25bb5 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -330,13 +330,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev)
idev->cnf.accept_ra;
}

-#if IS_ENABLED(CONFIG_IPV6)
-static inline int ip6_frag_mem(struct net *net)
-{
- return sum_frag_mem_limit(&net->ipv6.frags);
-}
-#endif
-
#define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */
#define IPV6_FRAG_LOW_THRESH (3 * 1024*1024) /* 3145728 */
#define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */
@@ -530,17 +523,8 @@ enum ip6_defrag_users {
__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
};

-struct ip6_create_arg {
- __be32 id;
- u32 user;
- const struct in6_addr *src;
- const struct in6_addr *dst;
- int iif;
- u8 ecn;
-};
-
void ip6_frag_init(struct inet_frag_queue *q, const void *a);
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
+extern const struct rhashtable_params ip6_rhash_params;

/*
* Equivalent of ipv4 struct ip
@@ -548,19 +532,13 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
struct frag_queue {
struct inet_frag_queue q;

- __be32 id; /* fragment id */
- u32 user;
- struct in6_addr saddr;
- struct in6_addr daddr;
-
int iif;
unsigned int csum;
__u16 nhoffset;
u8 ecn;
};

-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
- struct inet_frags *frags);
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);

static inline bool ipv6_addr_any(const struct in6_addr *a)
{
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index e7a31f830690..3442a26d36d9 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -55,6 +55,7 @@ enum
IPSTATS_MIB_ECT1PKTS, /* InECT1Pkts */
IPSTATS_MIB_ECT0PKTS, /* InECT0Pkts */
IPSTATS_MIB_CEPKTS, /* InCEPkts */
+ IPSTATS_MIB_REASM_OVERLAPS, /* ReasmOverlaps */
__IPSTATS_MIB_MAX
};

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 101dac085c62..fdffd6232365 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -251,8 +251,10 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
if (!new_tbl)
return 0;

- for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
+ for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
rhashtable_rehash_chain(ht, old_hash);
+ cond_resched();
+ }

/* Publish the new table pointer. */
rcu_assign_pointer(ht->tbl, new_tbl);
@@ -993,6 +995,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
for (i = 0; i < tbl->size; i++) {
struct rhash_head *pos, *next;

+ cond_resched();
for (pos = rht_dereference(tbl->buckets[i], ht),
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d31e801a467c..5e6a4d76659d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1089,7 +1089,6 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_DEBUG_VM_VMACACHE
"vmacache_find_calls",
"vmacache_find_hits",
- "vmacache_full_flushes",
#endif
#endif /* CONFIG_VM_EVENTS_COUNTERS */
};
diff --git a/net/core/dev.c b/net/core/dev.c
index b85e789044d5..15e3bb94156b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1664,6 +1664,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);

+/**
+ * call_netdevice_notifiers_mtu - call all network notifier blocks
+ * @val: value passed unmodified to notifier function
+ * @dev: net_device pointer passed unmodified to notifier function
+ * @arg: additional u32 argument passed to the notifier function
+ *
+ * Call all network notifier blocks. Parameters and return value
+ * are as for raw_notifier_call_chain().
+ */
+static int call_netdevice_notifiers_mtu(unsigned long val,
+ struct net_device *dev, u32 arg)
+{
+ struct netdev_notifier_info_ext info = {
+ .info.dev = dev,
+ .ext.mtu = arg,
+ };
+
+ BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
+
+ return call_netdevice_notifiers_info(val, dev, &info.info);
+}
+
#ifdef CONFIG_NET_INGRESS
static struct static_key ingress_needed __read_mostly;

@@ -6589,14 +6611,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
err = __dev_set_mtu(dev, new_mtu);

if (!err) {
- err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+ err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+ orig_mtu);
err = notifier_to_errno(err);
if (err) {
/* setting mtu back and notifying everyone again,
* so that they have a chance to revert changes.
*/
__dev_set_mtu(dev, orig_mtu);
- call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+ call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+ new_mtu);
}
}
return err;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 194e844e1021..189082dc288d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2368,6 +2368,12 @@ struct net_device *rtnl_create_link(struct net *net,
else if (ops->get_num_rx_queues)
num_rx_queues = ops->get_num_rx_queues();

+ if (num_tx_queues < 1 || num_tx_queues > 4096)
+ return ERR_PTR(-EINVAL);
+
+ if (num_rx_queues < 1 || num_rx_queues > 4096)
+ return ERR_PTR(-EINVAL);
+
err = -ENOMEM;
dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
ops->setup, num_tx_queues, num_rx_queues);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 84c731aef0d8..038ec74fa131 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1578,6 +1578,20 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len)
}
EXPORT_SYMBOL(___pskb_trim);

+/* Note : use pskb_trim_rcsum() instead of calling this directly
+ */
+int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
+{
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ int delta = skb->len - len;
+
+ skb->csum = csum_sub(skb->csum,
+ skb_checksum(skb, len, delta, 0));
+ }
+ return __pskb_trim(skb, len);
+}
+EXPORT_SYMBOL(pskb_trim_rcsum_slow);
+
/**
* __pskb_pull_tail - advance tail of skb header
* @skb: buffer to reallocate
@@ -2425,20 +2439,27 @@ EXPORT_SYMBOL(skb_queue_purge);
/**
* skb_rbtree_purge - empty a skb rbtree
* @root: root of the rbtree to empty
+ * Return value: the sum of truesizes of all purged skbs.
*
* Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
* the list and one reference dropped. This function does not take
* any lock. Synchronization should be handled by the caller (e.g., TCP
* out-of-order queue is protected by the socket lock).
*/
-void skb_rbtree_purge(struct rb_root *root)
+unsigned int skb_rbtree_purge(struct rb_root *root)
{
- struct sk_buff *skb, *next;
+ struct rb_node *p = rb_first(root);
+ unsigned int sum = 0;

- rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
- kfree_skb(skb);
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);

- *root = RB_ROOT;
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ sum += skb->truesize;
+ kfree_skb(skb);
+ }
+ return sum;
}

/**
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 4a05d7876850..84ff43acd427 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -605,11 +605,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (sk->sk_state == DCCP_LISTEN) {
if (dh->dccph_type == DCCP_PKT_REQUEST) {
/* It is possible that we process SYN packets from backlog,
- * so we need to make sure to disable BH right there.
+ * so we need to make sure to disable BH and RCU right there.
*/
+ rcu_read_lock();
local_bh_disable();
acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
local_bh_enable();
+ rcu_read_unlock();
if (!acceptable)
return 1;
consume_skb(skb);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 6697b180e122..28ad6f187e19 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -493,9 +493,11 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req

dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr,
ireq->ir_rmt_addr);
+ rcu_read_lock();
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq_opt_deref(ireq));
+ rcu_dereference(ireq->ireq_opt));
+ rcu_read_unlock();
err = net_xmit_eval(err);
}

diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index 5ac778962e4e..3bfec472734a 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -16,37 +16,19 @@ typedef unsigned __bitwise__ lowpan_rx_result;
#define LOWPAN_DISPATCH_FRAG1 0xc0
#define LOWPAN_DISPATCH_FRAGN 0xe0

-struct lowpan_create_arg {
+struct frag_lowpan_compare_key {
u16 tag;
u16 d_size;
- const struct ieee802154_addr *src;
- const struct ieee802154_addr *dst;
+ const struct ieee802154_addr src;
+ const struct ieee802154_addr dst;
};

-/* Equivalent of ipv4 struct ip
+/* Equivalent of ipv4 struct ipq
*/
struct lowpan_frag_queue {
struct inet_frag_queue q;
-
- u16 tag;
- u16 d_size;
- struct ieee802154_addr saddr;
- struct ieee802154_addr daddr;
};

-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
-{
- switch (a->mode) {
- case IEEE802154_ADDR_LONG:
- return (((__force u64)a->extended_addr) >> 32) ^
- (((__force u64)a->extended_addr) & 0xffffffff);
- case IEEE802154_ADDR_SHORT:
- return (__force u32)(a->short_addr + (a->pan_id << 16));
- default:
- return 0;
- }
-}
-
int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
void lowpan_net_frag_exit(void);
int lowpan_net_frag_init(void);
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index f85b08baff16..6fca75581e13 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags;
static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
struct sk_buff *prev, struct net_device *ldev);

-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
- const struct ieee802154_addr *saddr,
- const struct ieee802154_addr *daddr)
-{
- net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
- return jhash_3words(ieee802154_addr_hash(saddr),
- ieee802154_addr_hash(daddr),
- (__force u32)(tag + (d_size << 16)),
- lowpan_frags.rnd);
-}
-
-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
-{
- const struct lowpan_frag_queue *fq;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
-}
-
-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct lowpan_frag_queue *fq;
- const struct lowpan_create_arg *arg = a;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return fq->tag == arg->tag && fq->d_size == arg->d_size &&
- ieee802154_addr_equal(&fq->saddr, arg->src) &&
- ieee802154_addr_equal(&fq->daddr, arg->dst);
-}
-
static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
{
- const struct lowpan_create_arg *arg = a;
+ const struct frag_lowpan_compare_key *key = a;
struct lowpan_frag_queue *fq;

fq = container_of(q, struct lowpan_frag_queue, q);

- fq->tag = arg->tag;
- fq->d_size = arg->d_size;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
+ BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
+ memcpy(&q->key, key, sizeof(*key));
}

static void lowpan_frag_expire(unsigned long data)
@@ -93,10 +61,10 @@ static void lowpan_frag_expire(unsigned long data)
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;

- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
}

static inline struct lowpan_frag_queue *
@@ -104,25 +72,20 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
const struct ieee802154_addr *src,
const struct ieee802154_addr *dst)
{
- struct inet_frag_queue *q;
- struct lowpan_create_arg arg;
- unsigned int hash;
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ struct frag_lowpan_compare_key key = {
+ .tag = cb->d_tag,
+ .d_size = cb->d_size,
+ .src = *src,
+ .dst = *dst,
+ };
+ struct inet_frag_queue *q;

- arg.tag = cb->d_tag;
- arg.d_size = cb->d_size;
- arg.src = src;
- arg.dst = dst;
-
- hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
-
- q = inet_frag_find(&ieee802154_lowpan->frags,
- &lowpan_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct lowpan_frag_queue, q);
}

@@ -229,7 +192,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
struct sk_buff *fp, *head = fq->q.fragments;
int sum_truesize;

- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);

/* Make the one we just received the head. */
if (prev) {
@@ -437,7 +400,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
ret = lowpan_frag_queue(fq, skb, frag_type);
spin_unlock(&fq->q.lock);

- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
return ret;
}

@@ -447,24 +410,22 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
}

#ifdef CONFIG_SYSCTL
-static int zero;

static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
{
@@ -580,14 +541,20 @@ static int __net_init lowpan_frags_init_net(struct net *net)
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ int res;

ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+ ieee802154_lowpan->frags.f = &lowpan_frags;

- inet_frags_init_net(&ieee802154_lowpan->frags);
-
- return lowpan_frags_ns_sysctl_register(net);
+ res = inet_frags_init_net(&ieee802154_lowpan->frags);
+ if (res < 0)
+ return res;
+ res = lowpan_frags_ns_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
+ return res;
}

static void __net_exit lowpan_frags_exit_net(struct net *net)
@@ -596,7 +563,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
net_ieee802154_lowpan(net);

lowpan_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
}

static struct pernet_operations lowpan_frags_ops = {
@@ -604,32 +571,63 @@ static struct pernet_operations lowpan_frags_ops = {
.exit = lowpan_frags_exit_net,
};

-int __init lowpan_net_frag_init(void)
+static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
{
- int ret;
+ return jhash2(data,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}

- ret = lowpan_frags_sysctl_register();
- if (ret)
- return ret;
+static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;

- ret = register_pernet_subsys(&lowpan_frags_ops);
- if (ret)
- goto err_pernet;
+ return jhash2((const u32 *)&fq->key,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
+
+static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_lowpan_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params lowpan_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = lowpan_key_hashfn,
+ .obj_hashfn = lowpan_obj_hashfn,
+ .obj_cmpfn = lowpan_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
+int __init lowpan_net_frag_init(void)
+{
+ int ret;

- lowpan_frags.hashfn = lowpan_hashfn;
lowpan_frags.constructor = lowpan_frag_init;
lowpan_frags.destructor = NULL;
lowpan_frags.qsize = sizeof(struct frag_queue);
- lowpan_frags.match = lowpan_frag_match;
lowpan_frags.frag_expire = lowpan_frag_expire;
lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+ lowpan_frags.rhash_params = lowpan_rhash_params;
ret = inet_frags_init(&lowpan_frags);
if (ret)
- goto err_pernet;
+ goto out;

+ ret = lowpan_frags_sysctl_register();
+ if (ret)
+ goto err_sysctl;
+
+ ret = register_pernet_subsys(&lowpan_frags_ops);
+ if (ret)
+ goto err_pernet;
+out:
return ret;
err_pernet:
lowpan_frags_sysctl_unregister();
+err_sysctl:
+ inet_frags_fini(&lowpan_frags);
return ret;
}

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6a2ef162088d..9364c39d0555 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1171,7 +1171,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct netdev_notifier_changeupper_info *info;
+ struct netdev_notifier_changeupper_info *upper_info = ptr;
+ struct netdev_notifier_info_ext *info_ext = ptr;
struct in_device *in_dev;
struct net *net = dev_net(dev);
unsigned int flags;
@@ -1206,16 +1207,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
fib_sync_up(dev, RTNH_F_LINKDOWN);
else
fib_sync_down_dev(dev, event, false);
- /* fall through */
+ rt_cache_flush(net);
+ break;
case NETDEV_CHANGEMTU:
+ fib_sync_mtu(dev, info_ext->ext.mtu);
rt_cache_flush(net);
break;
case NETDEV_CHANGEUPPER:
- info = ptr;
+ upper_info = ptr;
/* flush all routes if dev is linked to or unlinked from
* an L3 master device (e.g., VRF)
*/
- if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+ if (upper_info->upper_dev &&
+ netif_is_l3_master(upper_info->upper_dev))
fib_disable_ip(dev, NETDEV_DOWN, true);
break;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a88dab33cdf6..90c654012510 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1377,6 +1377,56 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
return ret;
}

+/* Update the PMTU of exceptions when:
+ * - the new MTU of the first hop becomes smaller than the PMTU
+ * - the old MTU was the same as the PMTU, and it limited discovery of
+ * larger MTUs on the path. With that limit raised, we can now
+ * discover larger MTUs
+ * A special case is locked exceptions, for which the PMTU is smaller
+ * than the minimal accepted PMTU:
+ * - if the new MTU is greater than the PMTU, don't make any change
+ * - otherwise, unlock and set PMTU
+ */
+static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
+{
+ struct fnhe_hash_bucket *bucket;
+ int i;
+
+ bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
+ if (!bucket)
+ return;
+
+ for (i = 0; i < FNHE_HASH_SIZE; i++) {
+ struct fib_nh_exception *fnhe;
+
+ for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
+ fnhe;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
+ if (fnhe->fnhe_mtu_locked) {
+ if (new <= fnhe->fnhe_pmtu) {
+ fnhe->fnhe_pmtu = new;
+ fnhe->fnhe_mtu_locked = false;
+ }
+ } else if (new < fnhe->fnhe_pmtu ||
+ orig == fnhe->fnhe_pmtu) {
+ fnhe->fnhe_pmtu = new;
+ }
+ }
+ }
+}
+
+void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
+{
+ unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+ struct hlist_head *head = &fib_info_devhash[hash];
+ struct fib_nh *nh;
+
+ hlist_for_each_entry(nh, head, nh_hash) {
+ if (nh->nh_dev == dev)
+ nh_update_mtu(nh, dev->mtu, orig_mtu);
+ }
+}
+
/* Event force Flags Description
* NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host
* NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d1cab49393e2..528a6777cda0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -410,7 +410,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
struct ip_options_rcu *opt;
struct rtable *rt;

- opt = ireq_opt_deref(ireq);
+ rcu_read_lock();
+ opt = rcu_dereference(ireq->ireq_opt);

flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
@@ -424,11 +425,13 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
+ rcu_read_unlock();
return &rt->dst;

route_err:
ip_rt_put(rt);
no_route:
+ rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index f8b41aaac76f..8323d33c0ce2 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,12 +25,6 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>

-#define INETFRAGS_EVICT_BUCKETS 128
-#define INETFRAGS_EVICT_MAX 512
-
-/* don't rebuild inetfrag table with new secret more often than this */
-#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
-
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
* Value : 0xff if frame should be dropped.
* 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = {
};
EXPORT_SYMBOL(ip_frag_ecn_table);

-static unsigned int
-inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
-{
- return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
-}
-
-static bool inet_frag_may_rebuild(struct inet_frags *f)
-{
- return time_after(jiffies,
- f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
-}
-
-static void inet_frag_secret_rebuild(struct inet_frags *f)
-{
- int i;
-
- write_seqlock_bh(&f->rnd_seqlock);
-
- if (!inet_frag_may_rebuild(f))
- goto out;
-
- get_random_bytes(&f->rnd, sizeof(u32));
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- struct hlist_node *n;
-
- hb = &f->hash[i];
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(q, n, &hb->chain, list) {
- unsigned int hval = inet_frag_hashfn(f, q);
-
- if (hval != i) {
- struct inet_frag_bucket *hb_dest;
-
- hlist_del(&q->list);
-
- /* Relink to new hash chain. */
- hb_dest = &f->hash[hval];
-
- /* This is the only place where we take
- * another chain_lock while already holding
- * one. As this will not run concurrently,
- * we cannot deadlock on hb_dest lock below, if its
- * already locked it will be released soon since
- * other caller cannot be waiting for hb lock
- * that we've taken above.
- */
- spin_lock_nested(&hb_dest->chain_lock,
- SINGLE_DEPTH_NESTING);
- hlist_add_head(&q->list, &hb_dest->chain);
- spin_unlock(&hb_dest->chain_lock);
- }
- }
- spin_unlock(&hb->chain_lock);
- }
-
- f->rebuild = false;
- f->last_rebuild_jiffies = jiffies;
-out:
- write_sequnlock_bh(&f->rnd_seqlock);
-}
-
-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
-{
- if (!hlist_unhashed(&q->list_evictor))
- return false;
-
- return q->net->low_thresh == 0 ||
- frag_mem_limit(q->net) >= q->net->low_thresh;
-}
-
-static unsigned int
-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
-{
- struct inet_frag_queue *fq;
- struct hlist_node *n;
- unsigned int evicted = 0;
- HLIST_HEAD(expired);
-
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
- if (!inet_fragq_should_evict(fq))
- continue;
-
- if (!del_timer(&fq->timer))
- continue;
-
- hlist_add_head(&fq->list_evictor, &expired);
- ++evicted;
- }
-
- spin_unlock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
- f->frag_expire((unsigned long) fq);
-
- return evicted;
-}
-
-static void inet_frag_worker(struct work_struct *work)
-{
- unsigned int budget = INETFRAGS_EVICT_BUCKETS;
- unsigned int i, evicted = 0;
- struct inet_frags *f;
-
- f = container_of(work, struct inet_frags, frags_work);
-
- BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
-
- local_bh_disable();
-
- for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
- evicted += inet_evict_bucket(f, &f->hash[i]);
- i = (i + 1) & (INETFRAGS_HASHSZ - 1);
- if (evicted > INETFRAGS_EVICT_MAX)
- break;
- }
-
- f->next_bucket = i;
-
- local_bh_enable();
-
- if (f->rebuild && inet_frag_may_rebuild(f))
- inet_frag_secret_rebuild(f);
-}
-
-static void inet_frag_schedule_worker(struct inet_frags *f)
-{
- if (unlikely(!work_pending(&f->frags_work)))
- schedule_work(&f->frags_work);
-}
-
int inet_frags_init(struct inet_frags *f)
{
- int i;
-
- INIT_WORK(&f->frags_work, inet_frag_worker);
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb = &f->hash[i];
-
- spin_lock_init(&hb->chain_lock);
- INIT_HLIST_HEAD(&hb->chain);
- }
-
- seqlock_init(&f->rnd_seqlock);
- f->last_rebuild_jiffies = 0;
f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
NULL);
if (!f->frags_cachep)
@@ -214,83 +59,75 @@ EXPORT_SYMBOL(inet_frags_init);

void inet_frags_fini(struct inet_frags *f)
{
- cancel_work_sync(&f->frags_work);
+ /* We must wait that all inet_frag_destroy_rcu() have completed. */
+ rcu_barrier();
+
kmem_cache_destroy(f->frags_cachep);
+ f->frags_cachep = NULL;
}
EXPORT_SYMBOL(inet_frags_fini);

-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
+static void inet_frags_free_cb(void *ptr, void *arg)
{
- unsigned int seq;
- int i;
-
- nf->low_thresh = 0;
-
-evict_again:
- local_bh_disable();
- seq = read_seqbegin(&f->rnd_seqlock);
+ struct inet_frag_queue *fq = ptr;

- for (i = 0; i < INETFRAGS_HASHSZ ; i++)
- inet_evict_bucket(f, &f->hash[i]);
-
- local_bh_enable();
- cond_resched();
-
- if (read_seqretry(&f->rnd_seqlock, seq) ||
- sum_frag_mem_limit(nf))
- goto evict_again;
-}
-EXPORT_SYMBOL(inet_frags_exit_net);
-
-static struct inet_frag_bucket *
-get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
-__acquires(hb->chain_lock)
-{
- struct inet_frag_bucket *hb;
- unsigned int seq, hash;
-
- restart:
- seq = read_seqbegin(&f->rnd_seqlock);
-
- hash = inet_frag_hashfn(f, fq);
- hb = &f->hash[hash];
+ /* If we can not cancel the timer, it means this frag_queue
+ * is already disappearing, we have nothing to do.
+ * Otherwise, we own a refcount until the end of this function.
+ */
+ if (!del_timer(&fq->timer))
+ return;

- spin_lock(&hb->chain_lock);
- if (read_seqretry(&f->rnd_seqlock, seq)) {
- spin_unlock(&hb->chain_lock);
- goto restart;
+ spin_lock_bh(&fq->lock);
+ if (!(fq->flags & INET_FRAG_COMPLETE)) {
+ fq->flags |= INET_FRAG_COMPLETE;
+ atomic_dec(&fq->refcnt);
}
+ spin_unlock_bh(&fq->lock);

- return hb;
+ inet_frag_put(fq);
}

-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frags_exit_net(struct netns_frags *nf)
{
- struct inet_frag_bucket *hb;
+ nf->low_thresh = 0; /* prevent creation of new frags */

- hb = get_frag_bucket_locked(fq, f);
- hlist_del(&fq->list);
- fq->flags |= INET_FRAG_COMPLETE;
- spin_unlock(&hb->chain_lock);
+ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
}
+EXPORT_SYMBOL(inet_frags_exit_net);

-void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frag_kill(struct inet_frag_queue *fq)
{
if (del_timer(&fq->timer))
atomic_dec(&fq->refcnt);

if (!(fq->flags & INET_FRAG_COMPLETE)) {
- fq_unlink(fq, f);
+ struct netns_frags *nf = fq->net;
+
+ fq->flags |= INET_FRAG_COMPLETE;
+ rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
atomic_dec(&fq->refcnt);
}
}
EXPORT_SYMBOL(inet_frag_kill);

-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
+static void inet_frag_destroy_rcu(struct rcu_head *head)
+{
+ struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
+ rcu);
+ struct inet_frags *f = q->net->f;
+
+ if (f->destructor)
+ f->destructor(q);
+ kmem_cache_free(f->frags_cachep, q);
+}
+
+void inet_frag_destroy(struct inet_frag_queue *q)
{
struct sk_buff *fp;
struct netns_frags *nf;
unsigned int sum, sum_truesize = 0;
+ struct inet_frags *f;

WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != 0);
@@ -298,64 +135,35 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
/* Release all fragment data. */
fp = q->fragments;
nf = q->net;
- while (fp) {
- struct sk_buff *xp = fp->next;
-
- sum_truesize += fp->truesize;
- kfree_skb(fp);
- fp = xp;
+ f = nf->f;
+ if (fp) {
+ do {
+ struct sk_buff *xp = fp->next;
+
+ sum_truesize += fp->truesize;
+ kfree_skb(fp);
+ fp = xp;
+ } while (fp);
+ } else {
+ sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
}
sum = sum_truesize + f->qsize;

- if (f->destructor)
- f->destructor(q);
- kmem_cache_free(f->frags_cachep, q);
+ call_rcu(&q->rcu, inet_frag_destroy_rcu);

sub_frag_mem_limit(nf, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);

-static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
- struct inet_frag_queue *qp_in,
- struct inet_frags *f,
- void *arg)
-{
- struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
- struct inet_frag_queue *qp;
-
-#ifdef CONFIG_SMP
- /* With SMP race we have to recheck hash table, because
- * such entry could have been created on other cpu before
- * we acquired hash bucket lock.
- */
- hlist_for_each_entry(qp, &hb->chain, list) {
- if (qp->net == nf && f->match(qp, arg)) {
- atomic_inc(&qp->refcnt);
- spin_unlock(&hb->chain_lock);
- qp_in->flags |= INET_FRAG_COMPLETE;
- inet_frag_put(qp_in, f);
- return qp;
- }
- }
-#endif
- qp = qp_in;
- if (!mod_timer(&qp->timer, jiffies + nf->timeout))
- atomic_inc(&qp->refcnt);
-
- atomic_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &hb->chain);
-
- spin_unlock(&hb->chain_lock);
-
- return qp;
-}
-
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
struct inet_frags *f,
void *arg)
{
struct inet_frag_queue *q;

+ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
+ return NULL;
+
q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
if (!q)
return NULL;
@@ -366,75 +174,50 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,

setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
- atomic_set(&q->refcnt, 1);
+ atomic_set(&q->refcnt, 3);

return q;
}

static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- struct inet_frags *f,
void *arg)
{
+ struct inet_frags *f = nf->f;
struct inet_frag_queue *q;
+ int err;

q = inet_frag_alloc(nf, f, arg);
if (!q)
return NULL;

- return inet_frag_intern(nf, q, f, arg);
-}
+ mod_timer(&q->timer, jiffies + nf->timeout);

-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key,
- unsigned int hash)
-{
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- int depth = 0;
-
- if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
- inet_frag_schedule_worker(f);
+ err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
+ f->rhash_params);
+ if (err < 0) {
+ q->flags |= INET_FRAG_COMPLETE;
+ inet_frag_kill(q);
+ inet_frag_destroy(q);
return NULL;
}
-
- if (frag_mem_limit(nf) > nf->low_thresh)
- inet_frag_schedule_worker(f);
-
- hash &= (INETFRAGS_HASHSZ - 1);
- hb = &f->hash[hash];
-
- spin_lock(&hb->chain_lock);
- hlist_for_each_entry(q, &hb->chain, list) {
- if (q->net == nf && f->match(q, key)) {
- atomic_inc(&q->refcnt);
- spin_unlock(&hb->chain_lock);
- return q;
- }
- depth++;
- }
- spin_unlock(&hb->chain_lock);
-
- if (depth <= INETFRAGS_MAXDEPTH)
- return inet_frag_create(nf, f, key);
-
- if (inet_frag_may_rebuild(f)) {
- if (!f->rebuild)
- f->rebuild = true;
- inet_frag_schedule_worker(f);
- }
-
- return ERR_PTR(-ENOBUFS);
+ return q;
}
-EXPORT_SYMBOL(inet_frag_find);
+EXPORT_SYMBOL(inet_frag_create);

-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
- const char *prefix)
+/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
{
- static const char msg[] = "inet_frag_find: Fragment hash bucket"
- " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
- ". Dropping fragment.\n";
+ struct inet_frag_queue *fq;

- if (PTR_ERR(q) == -ENOBUFS)
- net_dbg_ratelimited("%s%s", prefix, msg);
+ rcu_read_lock();
+ fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ if (fq) {
+ if (!atomic_inc_not_zero(&fq->refcnt))
+ fq = NULL;
+ rcu_read_unlock();
+ return fq;
+ }
+ rcu_read_unlock();
+ return inet_frag_create(nf, key);
}
-EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
+EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 752711cd4834..cc8c6ac84d08 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -56,27 +56,64 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";

-struct ipfrag_skb_cb
-{
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
struct inet_skb_parm h;
- int offset;
+ struct sk_buff *next_frag;
+ int frag_run_len;
};

-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void ip4_frag_init_run(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+
+ FRAG_CB(skb)->next_frag = NULL;
+ FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
+ struct sk_buff *skb)
+{
+ RB_CLEAR_NODE(&skb->rbnode);
+ FRAG_CB(skb)->next_frag = NULL;
+
+ FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+ FRAG_CB(q->fragments_tail)->next_frag = skb;
+ q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+ if (q->last_run_head)
+ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+ &q->last_run_head->rbnode.rb_right);
+ else
+ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+ ip4_frag_init_run(skb);
+ q->fragments_tail = skb;
+ q->last_run_head = skb;
+}

/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;

- u32 user;
- __be32 saddr;
- __be32 daddr;
- __be16 id;
- u8 protocol;
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
- int vif; /* L3 master device index */
unsigned int rid;
struct inet_peer *peer;
};
@@ -88,49 +125,9 @@ static u8 ip4_frag_ecn(u8 tos)

static struct inet_frags ip4_frags;

-int ip_frag_mem(struct net *net)
-{
- return sum_frag_mem_limit(&net->ipv4.frags);
-}
-
-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
- struct net_device *dev);
-
-struct ip4_create_arg {
- struct iphdr *iph;
- u32 user;
- int vif;
-};
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev);

-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
-{
- net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
- return jhash_3words((__force u32)id << 16 | prot,
- (__force u32)saddr, (__force u32)daddr,
- ip4_frags.rnd);
-}
-
-static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
-{
- const struct ipq *ipq;
-
- ipq = container_of(q, struct ipq, q);
- return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
-}
-
-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct ipq *qp;
- const struct ip4_create_arg *arg = a;
-
- qp = container_of(q, struct ipq, q);
- return qp->id == arg->iph->id &&
- qp->saddr == arg->iph->saddr &&
- qp->daddr == arg->iph->daddr &&
- qp->protocol == arg->iph->protocol &&
- qp->user == arg->user &&
- qp->vif == arg->vif;
-}

static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
@@ -139,17 +136,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
frags);
struct net *net = container_of(ipv4, struct net, ipv4);

- const struct ip4_create_arg *arg = a;
+ const struct frag_v4_compare_key *key = a;

- qp->protocol = arg->iph->protocol;
- qp->id = arg->iph->id;
- qp->ecn = ip4_frag_ecn(arg->iph->tos);
- qp->saddr = arg->iph->saddr;
- qp->daddr = arg->iph->daddr;
- qp->vif = arg->vif;
- qp->user = arg->user;
+ q->key.v4 = *key;
+ qp->ecn = 0;
qp->peer = q->net->max_dist ?
- inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
+ inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
NULL;
}

@@ -167,7 +159,7 @@ static void ip4_frag_free(struct inet_frag_queue *q)

static void ipq_put(struct ipq *ipq)
{
- inet_frag_put(&ipq->q, &ip4_frags);
+ inet_frag_put(&ipq->q);
}

/* Kill ipq entry. It is not destroyed immediately,
@@ -175,7 +167,7 @@ static void ipq_put(struct ipq *ipq)
*/
static void ipq_kill(struct ipq *ipq)
{
- inet_frag_kill(&ipq->q, &ip4_frags);
+ inet_frag_kill(&ipq->q);
}

static bool frag_expire_skip_icmp(u32 user)
@@ -192,8 +184,11 @@ static bool frag_expire_skip_icmp(u32 user)
*/
static void ip_expire(unsigned long arg)
{
- struct ipq *qp;
+ const struct iphdr *iph;
+ struct sk_buff *head = NULL;
struct net *net;
+ struct ipq *qp;
+ int err;

qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
@@ -206,51 +201,65 @@ static void ip_expire(unsigned long arg)

ipq_kill(qp);
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);

- if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *clone, *head = qp->q.fragments;
- const struct iphdr *iph;
- int err;
-
- __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
+ if (!(qp->q.flags & INET_FRAG_FIRST_IN))
+ goto out;

- if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
+ /* sk_buff::dev and sk_buff::rbnode are unionized. So we
+ * pull the head out of the tree in order to be able to
+ * deal with head->dev.
+ */
+ if (qp->q.fragments) {
+ head = qp->q.fragments;
+ qp->q.fragments = head->next;
+ } else {
+ head = skb_rb_first(&qp->q.rb_fragments);
+ if (!head)
goto out;
+ if (FRAG_CB(head)->next_frag)
+ rb_replace_node(&head->rbnode,
+ &FRAG_CB(head)->next_frag->rbnode,
+ &qp->q.rb_fragments);
+ else
+ rb_erase(&head->rbnode, &qp->q.rb_fragments);
+ memset(&head->rbnode, 0, sizeof(head->rbnode));
+ barrier();
+ }
+ if (head == qp->q.fragments_tail)
+ qp->q.fragments_tail = NULL;

- head->dev = dev_get_by_index_rcu(net, qp->iif);
- if (!head->dev)
- goto out;
+ sub_frag_mem_limit(qp->q.net, head->truesize);
+
+ head->dev = dev_get_by_index_rcu(net, qp->iif);
+ if (!head->dev)
+ goto out;


- /* skb has no dst, perform route lookup again */
- iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ /* skb has no dst, perform route lookup again */
+ iph = ip_hdr(head);
+ err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
- if (err)
- goto out;
+ if (err)
+ goto out;

- /* Only an end host needs to send an ICMP
- * "Fragment Reassembly Timeout" message, per RFC792.
- */
- if (frag_expire_skip_icmp(qp->user) &&
- (skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out;
+ /* Only an end host needs to send an ICMP
+ * "Fragment Reassembly Timeout" message, per RFC792.
+ */
+ if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
+ goto out;

- clone = skb_clone(head, GFP_ATOMIC);
+ spin_unlock(&qp->q.lock);
+ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+ goto out_rcu_unlock;

- /* Send an ICMP "Fragment Reassembly Timeout" message. */
- if (clone) {
- spin_unlock(&qp->q.lock);
- icmp_send(clone, ICMP_TIME_EXCEEDED,
- ICMP_EXC_FRAGTIME, 0);
- consume_skb(clone);
- goto out_rcu_unlock;
- }
- }
out:
spin_unlock(&qp->q.lock);
out_rcu_unlock:
rcu_read_unlock();
+ if (head)
+ kfree_skb(head);
ipq_put(qp);
}

@@ -260,21 +269,20 @@ static void ip_expire(unsigned long arg)
static struct ipq *ip_find(struct net *net, struct iphdr *iph,
u32 user, int vif)
{
+ struct frag_v4_compare_key key = {
+ .saddr = iph->saddr,
+ .daddr = iph->daddr,
+ .user = user,
+ .vif = vif,
+ .id = iph->id,
+ .protocol = iph->protocol,
+ };
struct inet_frag_queue *q;
- struct ip4_create_arg arg;
- unsigned int hash;
-
- arg.iph = iph;
- arg.user = user;
- arg.vif = vif;

- hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
-
- q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&net->ipv4.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct ipq, q);
}

@@ -294,7 +302,7 @@ static int ip_frag_too_far(struct ipq *qp)
end = atomic_inc_return(&peer->rid);
qp->rid = end;

- rc = qp->q.fragments && (end - start) > max;
+ rc = qp->q.fragments_tail && (end - start) > max;

if (rc) {
struct net *net;
@@ -308,7 +316,6 @@ static int ip_frag_too_far(struct ipq *qp)

static int ip_frag_reinit(struct ipq *qp)
{
- struct sk_buff *fp;
unsigned int sum_truesize = 0;

if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
@@ -316,21 +323,16 @@ static int ip_frag_reinit(struct ipq *qp)
return -ETIMEDOUT;
}

- fp = qp->q.fragments;
- do {
- struct sk_buff *xp = fp->next;
-
- sum_truesize += fp->truesize;
- kfree_skb(fp);
- fp = xp;
- } while (fp);
+ sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
sub_frag_mem_limit(qp->q.net, sum_truesize);

qp->q.flags = 0;
qp->q.len = 0;
qp->q.meat = 0;
qp->q.fragments = NULL;
+ qp->q.rb_fragments = RB_ROOT;
qp->q.fragments_tail = NULL;
+ qp->q.last_run_head = NULL;
qp->iif = 0;
qp->ecn = 0;

@@ -340,7 +342,9 @@ static int ip_frag_reinit(struct ipq *qp)
/* Add new segment to existing queue. */
static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
- struct sk_buff *prev, *next;
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct rb_node **rbn, *parent;
+ struct sk_buff *skb1, *prev_tail;
struct net_device *dev;
unsigned int fragsize;
int flags, offset;
@@ -403,99 +407,61 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (err)
goto err;

- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = qp->q.fragments_tail;
- if (!prev || FRAG_CB(prev)->offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = qp->q.fragments; next != NULL; next = next->next) {
- if (FRAG_CB(next)->offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* We found where to put this one. Check for overlap with
- * preceding fragment, and, if needed, align things so that
- * any overlaps are eliminated.
+ /* Note : skb->rbnode and skb->dev share the same location. */
+ dev = skb->dev;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+
+ /* RFC5722, Section 4, amended by Errata ID : 3089
+ * When reassembling an IPv6 datagram, if
+ * one or more its constituent fragments is determined to be an
+ * overlapping fragment, the entire datagram (and any constituent
+ * fragments) MUST be silently discarded.
+ *
+ * We do the same here for IPv4 (and increment an snmp counter).
*/
- if (prev) {
- int i = (FRAG_CB(prev)->offset + prev->len) - offset;

- if (i > 0) {
- offset += i;
- err = -EINVAL;
- if (end <= offset)
- goto err;
- err = -ENOMEM;
- if (!pskb_pull(skb, i))
- goto err;
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->ip_summed = CHECKSUM_NONE;
- }
- }
-
- err = -ENOMEM;
-
- while (next && FRAG_CB(next)->offset < end) {
- int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
-
- if (i < next->len) {
- int delta = -next->truesize;
-
- /* Eat head of the next overlapped fragment
- * and leave the loop. The next ones cannot overlap.
- */
- if (!pskb_pull(next, i))
- goto err;
- delta += next->truesize;
- if (delta)
- add_frag_mem_limit(qp->q.net, delta);
- FRAG_CB(next)->offset += i;
- qp->q.meat -= i;
- if (next->ip_summed != CHECKSUM_UNNECESSARY)
- next->ip_summed = CHECKSUM_NONE;
- break;
- } else {
- struct sk_buff *free_it = next;
-
- /* Old fragment is completely overridden with
- * new one drop it.
- */
- next = next->next;
-
- if (prev)
- prev->next = next;
- else
- qp->q.fragments = next;
-
- qp->q.meat -= free_it->len;
- sub_frag_mem_limit(qp->q.net, free_it->truesize);
- kfree_skb(free_it);
- }
+ /* Find out where to put this fragment. */
+ prev_tail = qp->q.fragments_tail;
+ if (!prev_tail)
+ ip4_frag_create_run(&qp->q, skb); /* First fragment. */
+ else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
+ /* This is the common case: skb goes to the end. */
+ /* Detect and discard overlaps. */
+ if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
+ goto discard_qp;
+ if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
+ ip4_frag_append_to_last_run(&qp->q, skb);
+ else
+ ip4_frag_create_run(&qp->q, skb);
+ } else {
+ /* Binary search. Note that skb can become the first fragment,
+ * but not the last (covered above).
+ */
+ rbn = &qp->q.rb_fragments.rb_node;
+ do {
+ parent = *rbn;
+ skb1 = rb_to_skb(parent);
+ if (end <= skb1->ip_defrag_offset)
+ rbn = &parent->rb_left;
+ else if (offset >= skb1->ip_defrag_offset +
+ FRAG_CB(skb1)->frag_run_len)
+ rbn = &parent->rb_right;
+ else /* Found an overlap with skb1. */
+ goto discard_qp;
+ } while (*rbn);
+ /* Here we have parent properly set, and rbn pointing to
+ * one of its NULL left/right children. Insert skb.
+ */
+ ip4_frag_init_run(skb);
+ rb_link_node(&skb->rbnode, parent, rbn);
+ rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
}

- FRAG_CB(skb)->offset = offset;
-
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- qp->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- qp->q.fragments = skb;
-
- dev = skb->dev;
- if (dev) {
+ if (dev)
qp->iif = dev->ifindex;
- skb->dev = NULL;
- }
+ skb->ip_defrag_offset = offset;
+
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
@@ -517,7 +483,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
unsigned long orefdst = skb->_skb_refdst;

skb->_skb_refdst = 0UL;
- err = ip_frag_reasm(qp, prev, dev);
+ err = ip_frag_reasm(qp, skb, prev_tail, dev);
skb->_skb_refdst = orefdst;
return err;
}
@@ -525,20 +491,24 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
skb_dst_drop(skb);
return -EINPROGRESS;

+discard_qp:
+ inet_frag_kill(&qp->q);
+ err = -EINVAL;
+ __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
err:
kfree_skb(skb);
return err;
}

-
/* Build a new IP datagram from all its fragments. */
-
-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
- struct net_device *dev)
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev)
{
struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
struct iphdr *iph;
- struct sk_buff *fp, *head = qp->q.fragments;
+ struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
+ struct sk_buff **nextp; /* To build frag_list. */
+ struct rb_node *rbn;
int len;
int ihlen;
int err;
@@ -552,26 +522,27 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
goto out_fail;
}
/* Make the one we just received the head. */
- if (prev) {
- head = prev->next;
- fp = skb_clone(head, GFP_ATOMIC);
+ if (head != skb) {
+ fp = skb_clone(skb, GFP_ATOMIC);
if (!fp)
goto out_nomem;
-
- fp->next = head->next;
- if (!fp->next)
+ FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+ if (RB_EMPTY_NODE(&skb->rbnode))
+ FRAG_CB(prev_tail)->next_frag = fp;
+ else
+ rb_replace_node(&skb->rbnode, &fp->rbnode,
+ &qp->q.rb_fragments);
+ if (qp->q.fragments_tail == skb)
qp->q.fragments_tail = fp;
- prev->next = fp;
-
- skb_morph(head, qp->q.fragments);
- head->next = qp->q.fragments->next;
-
- consume_skb(qp->q.fragments);
- qp->q.fragments = head;
+ skb_morph(skb, head);
+ FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+ rb_replace_node(&head->rbnode, &skb->rbnode,
+ &qp->q.rb_fragments);
+ consume_skb(head);
+ head = skb;
}

- WARN_ON(!head);
- WARN_ON(FRAG_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);

/* Allocate a new buffer for the datagram. */
ihlen = ip_hdrlen(head);
@@ -595,35 +566,61 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
clone = alloc_skb(0, GFP_ATOMIC);
if (!clone)
goto out_nomem;
- clone->next = head->next;
- head->next = clone;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
+ head->truesize += clone->truesize;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
add_frag_mem_limit(qp->q.net, clone->truesize);
+ skb_shinfo(head)->frag_list = clone;
+ nextp = &clone->next;
+ } else {
+ nextp = &skb_shinfo(head)->frag_list;
}

- skb_shinfo(head)->frag_list = head->next;
skb_push(head, head->data - skb_network_header(head));

- for (fp=head->next; fp; fp = fp->next) {
- head->data_len += fp->len;
- head->len += fp->len;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
+ /* Traverse the tree in order, to build frag_list. */
+ fp = FRAG_CB(head)->next_frag;
+ rbn = rb_next(&head->rbnode);
+ rb_erase(&head->rbnode, &qp->q.rb_fragments);
+ while (rbn || fp) {
+ /* fp points to the next sk_buff in the current run;
+ * rbn points to the next run.
+ */
+ /* Go through the current run. */
+ while (fp) {
+ *nextp = fp;
+ nextp = &fp->next;
+ fp->prev = NULL;
+ memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+ fp->sk = NULL;
+ head->data_len += fp->len;
+ head->len += fp->len;
+ if (head->ip_summed != fp->ip_summed)
+ head->ip_summed = CHECKSUM_NONE;
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_add(head->csum, fp->csum);
+ head->truesize += fp->truesize;
+ fp = FRAG_CB(fp)->next_frag;
+ }
+ /* Move to the next run. */
+ if (rbn) {
+ struct rb_node *rbnext = rb_next(rbn);
+
+ fp = rb_to_skb(rbn);
+ rb_erase(rbn, &qp->q.rb_fragments);
+ rbn = rbnext;
+ }
}
sub_frag_mem_limit(qp->q.net, head->truesize);

+ *nextp = NULL;
head->next = NULL;
+ head->prev = NULL;
head->dev = dev;
head->tstamp = qp->q.stamp;
IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
@@ -651,7 +648,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,

__IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
qp->q.fragments = NULL;
+ qp->q.rb_fragments = RB_ROOT;
qp->q.fragments_tail = NULL;
+ qp->q.last_run_head = NULL;
return 0;

out_nomem:
@@ -659,7 +658,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
err = -ENOMEM;
goto out_fail;
out_oversize:
- net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
+ net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
out_fail:
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
return err;
@@ -733,25 +732,46 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
}
EXPORT_SYMBOL(ip_check_defrag);

+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+ struct rb_node *p = rb_first(root);
+ unsigned int sum = 0;
+
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ while (skb) {
+ struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+ sum += skb->truesize;
+ kfree_skb(skb);
+ skb = next;
+ }
+ }
+ return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
#ifdef CONFIG_SYSCTL
-static int zero;
+static int dist_min;

static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.procname = "ipfrag_high_thresh",
.data = &init_net.ipv4.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
.data = &init_net.ipv4.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ipv4.frags.high_thresh
},
{
@@ -767,7 +787,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .extra1 = &dist_min,
},
{ }
};
@@ -849,6 +869,8 @@ static void __init ip4_frags_ctl_register(void)

static int __net_init ipv4_frags_init_net(struct net *net)
{
+ int res;
+
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -873,16 +895,21 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.timeout = IP_FRAG_TIME;

net->ipv4.frags.max_dist = 64;
-
- inet_frags_init_net(&net->ipv4.frags);
-
- return ip4_frags_ns_ctl_register(net);
+ net->ipv4.frags.f = &ip4_frags;
+
+ res = inet_frags_init_net(&net->ipv4.frags);
+ if (res < 0)
+ return res;
+ res = ip4_frags_ns_ctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->ipv4.frags);
+ return res;
}

static void __net_exit ipv4_frags_exit_net(struct net *net)
{
ip4_frags_ns_ctl_unregister(net);
- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+ inet_frags_exit_net(&net->ipv4.frags);
}

static struct pernet_operations ip4_frags_ops = {
@@ -890,17 +917,49 @@ static struct pernet_operations ip4_frags_ops = {
.exit = ipv4_frags_exit_net,
};

+
+static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v4,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v4_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params ip4_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .key_offset = offsetof(struct inet_frag_queue, key),
+ .key_len = sizeof(struct frag_v4_compare_key),
+ .hashfn = ip4_key_hashfn,
+ .obj_hashfn = ip4_obj_hashfn,
+ .obj_cmpfn = ip4_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
void __init ipfrag_init(void)
{
- ip4_frags_ctl_register();
- register_pernet_subsys(&ip4_frags_ops);
- ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
ip4_frags.destructor = ip4_frag_free;
ip4_frags.qsize = sizeof(struct ipq);
- ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire;
ip4_frags.frags_cache_name = ip_frag_cache_name;
+ ip4_frags.rhash_params = ip4_rhash_params;
if (inet_frags_init(&ip4_frags))
panic("IP: failed to allocate ip4_frags cache\n");
+ ip4_frags_ctl_register();
+ register_pernet_subsys(&ip4_frags_ops);
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b21e435f428c..a5851c0bc278 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -134,7 +134,6 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
{
struct sockaddr_in sin;
- const struct iphdr *iph = ip_hdr(skb);
__be16 *ports;
int end;

@@ -149,7 +148,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
ports = (__be16 *)skb_transport_header(skb);

sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = iph->daddr;
+ sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
sin.sin_port = ports[1];
memset(sin.sin_zero, 0, sizeof(sin.sin_zero));

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e1271e75e107..d8d99c21a9c1 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -627,6 +627,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, u8 protocol)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ unsigned int inner_nhdr_len = 0;
const struct iphdr *inner_iph;
struct flowi4 fl4;
u8 tos, ttl;
@@ -636,6 +637,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
__be32 dst;
bool connected;

+ /* ensure we can access the inner net header, for several users below */
+ if (skb->protocol == htons(ETH_P_IP))
+ inner_nhdr_len = sizeof(struct iphdr);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ inner_nhdr_len = sizeof(struct ipv6hdr);
+ if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
+ goto tx_error;
+
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 7143ca1a6af9..ec48d8eafc7e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,7 +54,6 @@
static int sockstat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem;
int orphans, sockets;

local_bh_disable();
@@ -74,8 +73,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
- frag_mem = ip_frag_mem(net);
- seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG: inuse %u memory %lu\n",
+ atomic_read(&net->ipv4.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv4.frags));
return 0;
}

@@ -134,6 +134,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
+ SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS),
SNMP_MIB_SENTINEL
};

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9d0b73aa649f..dbb153c6b21a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4406,7 +4406,7 @@ static void tcp_ofo_queue(struct sock *sk)

p = rb_first(&tp->out_of_order_queue);
while (p) {
- skb = rb_entry(p, struct sk_buff, rbnode);
+ skb = rb_to_skb(p);
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;

@@ -4470,7 +4470,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct rb_node **p, *q, *parent;
+ struct rb_node **p, *parent;
struct sk_buff *skb1;
u32 seq, end_seq;
bool fragstolen;
@@ -4529,7 +4529,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
parent = NULL;
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(seq, TCP_SKB_CB(skb1)->seq)) {
p = &parent->rb_left;
continue;
@@ -4574,9 +4574,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)

merge_right:
/* Remove other segments covered by skb. */
- while ((q = rb_next(&skb->rbnode)) != NULL) {
- skb1 = rb_entry(q, struct sk_buff, rbnode);
-
+ while ((skb1 = skb_rb_next(skb)) != NULL) {
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
break;
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
@@ -4591,7 +4589,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
tcp_drop(sk, skb1);
}
/* If there is no skb after us, we are the last_skb ! */
- if (!q)
+ if (!skb1)
tp->ooo_last_skb = skb;

add_sack:
@@ -4792,7 +4790,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li
if (list)
return !skb_queue_is_last(list, skb) ? skb->next : NULL;

- return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+ return skb_rb_next(skb);
}

static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -4821,7 +4819,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)

while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
p = &parent->rb_left;
else
@@ -4941,19 +4939,12 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
u32 range_truesize, sum_tiny = 0;
struct sk_buff *skb, *head;
- struct rb_node *p;
u32 start, end;

- p = rb_first(&tp->out_of_order_queue);
- skb = rb_entry_safe(p, struct sk_buff, rbnode);
+ skb = skb_rb_first(&tp->out_of_order_queue);
new_range:
if (!skb) {
- p = rb_last(&tp->out_of_order_queue);
- /* Note: This is possible p is NULL here. We do not
- * use rb_entry_safe(), as ooo_last_skb is valid only
- * if rbtree is not empty.
- */
- tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
+ tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
return;
}
start = TCP_SKB_CB(skb)->seq;
@@ -4961,7 +4952,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
range_truesize = skb->truesize;

for (head = skb;;) {
- skb = tcp_skb_next(skb, NULL);
+ skb = skb_rb_next(skb);

/* Range is terminated when we see a gap or when
* we are at the queue end.
@@ -5017,7 +5008,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
goal -= rb_to_skb(node)->truesize;
- tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
+ tcp_drop(sk, rb_to_skb(node));
if (!prev || goal <= 0) {
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
@@ -5027,7 +5018,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
}
node = prev;
} while (node);
- tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+ tp->ooo_last_skb = rb_to_skb(prev);

/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
@@ -5978,11 +5969,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (th->fin)
goto discard;
/* It is possible that we process SYN packets from backlog,
- * so we need to make sure to disable BH right there.
+ * so we need to make sure to disable BH and RCU right there.
*/
+ rcu_read_lock();
local_bh_disable();
acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
local_bh_enable();
+ rcu_read_unlock();

if (!acceptable)
return 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 16dea67792e0..1ea0c91ba994 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -859,9 +859,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);

+ rcu_read_lock();
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq_opt_deref(ireq));
+ rcu_dereference(ireq->ireq_opt));
+ rcu_read_unlock();
err = net_xmit_eval(err);
}

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3a27cf762da1..bc532206077f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4068,7 +4068,6 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
p++;
continue;
}
- state->offset++;
return ifa;
}

@@ -4092,13 +4091,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
return ifa;
}

+ state->offset = 0;
while (++state->bucket < IN6_ADDR_HSIZE) {
- state->offset = 0;
hlist_for_each_entry_rcu_bh(ifa,
&inet6_addr_lst[state->bucket], addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
- state->offset++;
return ifa;
}
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index cda63426eefb..fd081a14064e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1226,7 +1226,7 @@ static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- const struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph;
int encap_limit = -1;
struct flowi6 fl6;
__u8 dsfield;
@@ -1234,6 +1234,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;

+ /* ensure we can access the full inner ip header */
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ return -1;
+
+ iph = ip_hdr(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

tproto = ACCESS_ONCE(t->parms.proto);
@@ -1293,7 +1298,7 @@ static inline int
ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ipv6hdr *ipv6h;
int encap_limit = -1;
__u16 offset;
struct flowi6 fl6;
@@ -1302,6 +1307,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;

+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ return -1;
+
+ ipv6h = ipv6_hdr(skb);
tproto = ACCESS_ONCE(t->parms.proto);
if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
ip6_tnl_addr_conflict(t, ipv6h))
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index ee33a6743f3b..b9147558a8f2 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -63,7 +63,6 @@ struct nf_ct_frag6_skb_cb
static struct inet_frags nf_frags;

#ifdef CONFIG_SYSCTL
-static int zero;

static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
@@ -76,18 +75,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_low_thresh",
.data = &init_net.nf_frag.frags.low_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
@@ -152,23 +150,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}

-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
-}
-
-
-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *nq;
-
- nq = container_of(q, struct frag_queue, q);
- return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
-}
-
static void nf_ct_frag6_expire(unsigned long data)
{
struct frag_queue *fq;
@@ -177,34 +158,26 @@ static void nf_ct_frag6_expire(unsigned long data)
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);

- ip6_expire_frag_queue(net, fq, &nf_frags);
+ ip6_expire_frag_queue(net, fq);
}

/* Creation primitives. */
-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
- u32 user, struct in6_addr *src,
- struct in6_addr *dst, int iif, u8 ecn)
+static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+ const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = user,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = user;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
-
- local_bh_disable();
- hash = nf_hash_frag(id, src, dst);
-
- q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
- local_bh_enable();
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+
+ q = inet_frag_find(&net->nf_frag.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}

@@ -263,7 +236,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
return -EPROTO;
}
if (end > fq->q.len) {
@@ -356,7 +329,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
return 0;

discard_fq:
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
err:
return -EINVAL;
}
@@ -378,7 +351,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
int payload_len;
u8 ecn;

- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);

WARN_ON(head == NULL);
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
@@ -479,6 +452,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
+ fp->sk = NULL;
}
sub_frag_mem_limit(fq->q.net, head->truesize);

@@ -497,6 +471,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
head->csum);

fq->q.fragments = NULL;
+ fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;

return true;
@@ -591,9 +566,13 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);

+ if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
+ fhdr->frag_off & htons(IP6_MF))
+ return -EINVAL;
+
skb_orphan(skb);
- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ fq = fq_find(net, fhdr->identification, user, hdr,
+ skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
return -ENOMEM;
@@ -623,25 +602,33 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)

out_unlock:
spin_unlock_bh(&fq->q.lock);
- inet_frag_put(&fq->q, &nf_frags);
+ inet_frag_put(&fq->q);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);

static int nf_ct_net_init(struct net *net)
{
+ int res;
+
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&net->nf_frag.frags);
-
- return nf_ct_frag6_sysctl_register(net);
+ net->nf_frag.frags.f = &nf_frags;
+
+ res = inet_frags_init_net(&net->nf_frag.frags);
+ if (res < 0)
+ return res;
+ res = nf_ct_frag6_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->nf_frag.frags);
+ return res;
}

static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+ inet_frags_exit_net(&net->nf_frag.frags);
}

static struct pernet_operations nf_ct_net_ops = {
@@ -653,13 +640,12 @@ int nf_ct_frag6_init(void)
{
int ret = 0;

- nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
- nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.frags_cache_name = nf_frags_cache_name;
+ nf_frags.rhash_params = ip6_rhash_params;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index e88bcb8ff0fd..dc04c024986c 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -38,7 +38,6 @@
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem = ip6_frag_mem(net);

seq_printf(seq, "TCP6: inuse %d\n",
sock_prot_inuse_get(net, &tcpv6_prot));
@@ -48,7 +47,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
+ atomic_read(&net->ipv6.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv6.frags));
return 0;
}

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 71ffa526cb23..a4f979ff31b9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -645,8 +645,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_dst_set(skb, &rt->dst);
- *dstp = NULL;

skb_put(skb, length);
skb_reset_network_header(skb);
@@ -656,8 +654,14 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,

skb->transport_header = skb->network_header;
err = memcpy_from_msg(iph, msg, length);
- if (err)
- goto error_fault;
+ if (err) {
+ err = -EFAULT;
+ kfree_skb(skb);
+ goto error;
+ }
+
+ skb_dst_set(skb, &rt->dst);
+ *dstp = NULL;

/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -666,21 +670,28 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
if (unlikely(!skb))
return 0;

+ /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev
+ * in the error path. Since skb has been freed, the dst could
+ * have been queued for deletion.
+ */
+ rcu_read_lock();
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, rt->dst.dev, dst_output);
if (err > 0)
err = net_xmit_errno(err);
- if (err)
- goto error;
+ if (err) {
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
+ goto error_check;
+ }
+ rcu_read_unlock();
out:
return 0;

-error_fault:
- err = -EFAULT;
- kfree_skb(skb);
error:
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+error_check:
if (err == -ENOBUFS && !np->recverr)
err = 0;
return err;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e585c0a2591c..74ffbcb306a6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -79,94 +79,58 @@ static struct inet_frags ip6_frags;
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev);

-/*
- * callers should be careful not to use the hash value outside the ipfrag_lock
- * as doing so could race with ipfrag_hash_rnd being recalculated.
- */
-static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, ip6_frags.rnd);
-}
-
-static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *fq;
-
- fq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
-}
-
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct frag_queue *fq;
- const struct ip6_create_arg *arg = a;
-
- fq = container_of(q, struct frag_queue, q);
- return fq->id == arg->id &&
- fq->user == arg->user &&
- ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst) &&
- (arg->iif == fq->iif ||
- !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
- IPV6_ADDR_LINKLOCAL)));
-}
-EXPORT_SYMBOL(ip6_frag_match);
-
void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
- const struct ip6_create_arg *arg = a;
+ const struct frag_v6_compare_key *key = a;

- fq->id = arg->id;
- fq->user = arg->user;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
- fq->ecn = arg->ecn;
+ q->key.v6 = *key;
+ fq->ecn = 0;
}
EXPORT_SYMBOL(ip6_frag_init);

-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
- struct inet_frags *frags)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
+ struct sk_buff *head;

+ rcu_read_lock();
spin_lock(&fq->q.lock);

if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;

- inet_frag_kill(&fq->q, frags);
+ inet_frag_kill(&fq->q);

- rcu_read_lock();
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
- goto out_rcu_unlock;
+ goto out;

__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
-
- if (inet_frag_evicting(&fq->q))
- goto out_rcu_unlock;
-
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);

/* Don't send error if the first segment did not arrive. */
- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
- goto out_rcu_unlock;
+ head = fq->q.fragments;
+ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+ goto out;

/* But use as source device on which LAST ARRIVED
* segment was received. And do not use fq->dev
* pointer directly, device might already disappeared.
*/
- fq->q.fragments->dev = dev;
- icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
+ head->dev = dev;
+ skb_get(head);
+ spin_unlock(&fq->q.lock);
+
+ icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+ kfree_skb(head);
+ goto out_rcu_unlock;
+
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, frags);
+out_rcu_unlock:
+ rcu_read_unlock();
+ inet_frag_put(&fq->q);
}
EXPORT_SYMBOL(ip6_expire_frag_queue);

@@ -178,31 +142,29 @@ static void ip6_frag_expire(unsigned long data)
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);

- ip6_expire_frag_queue(net, fq, &ip6_frags);
+ ip6_expire_frag_queue(net, fq);
}

static struct frag_queue *
-fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, int iif, u8 ecn)
+fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = IP6_DEFRAG_LOCAL_DELIVER,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;

- arg.id = id;
- arg.user = IP6_DEFRAG_LOCAL_DELIVER;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
+ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)))
+ key.iif = 0;

- hash = inet6_hash_frag(id, src, dst);
-
- q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&net->ipv6.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}

@@ -359,7 +321,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
return -1;

discard_fq:
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
err:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
@@ -386,7 +348,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
int sum_truesize;
u8 ecn;

- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);

ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -504,6 +466,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
fq->q.fragments = NULL;
+ fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
return 1;

@@ -525,6 +488,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_queue *fq;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
+ int iif;

if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
@@ -553,17 +517,22 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}

- fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
+ fhdr->frag_off & htons(IP6_MF))
+ goto fail_hdr;
+
+ iif = skb->dev ? skb->dev->ifindex : 0;
+ fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
int ret;

spin_lock(&fq->q.lock);

+ fq->iif = iif;
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);

spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &ip6_frags);
+ inet_frag_put(&fq->q);
return ret;
}

@@ -584,24 +553,22 @@ static const struct inet6_protocol frag_protocol = {
};

#ifdef CONFIG_SYSCTL
-static int zero;

static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ipv6.frags.high_thresh
},
{
@@ -644,10 +611,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[1].data = &net->ipv6.frags.low_thresh;
table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
}

hdr = register_net_sysctl(net, "net/ipv6", table);
@@ -709,19 +672,27 @@ static void ip6_frags_sysctl_unregister(void)

static int __net_init ipv6_frags_init_net(struct net *net)
{
+ int res;
+
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+ net->ipv6.frags.f = &ip6_frags;

- inet_frags_init_net(&net->ipv6.frags);
+ res = inet_frags_init_net(&net->ipv6.frags);
+ if (res < 0)
+ return res;

- return ip6_frags_ns_sysctl_register(net);
+ res = ip6_frags_ns_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->ipv6.frags);
+ return res;
}

static void __net_exit ipv6_frags_exit_net(struct net *net)
{
ip6_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+ inet_frags_exit_net(&net->ipv6.frags);
}

static struct pernet_operations ip6_frags_ops = {
@@ -729,14 +700,55 @@ static struct pernet_operations ip6_frags_ops = {
.exit = ipv6_frags_exit_net,
};

+static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v6,
+ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v6_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+const struct rhashtable_params ip6_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = ip6_key_hashfn,
+ .obj_hashfn = ip6_obj_hashfn,
+ .obj_cmpfn = ip6_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+EXPORT_SYMBOL(ip6_rhash_params);
+
int __init ipv6_frag_init(void)
{
int ret;

- ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ ip6_frags.constructor = ip6_frag_init;
+ ip6_frags.destructor = NULL;
+ ip6_frags.qsize = sizeof(struct frag_queue);
+ ip6_frags.frag_expire = ip6_frag_expire;
+ ip6_frags.frags_cache_name = ip6_frag_cache_name;
+ ip6_frags.rhash_params = ip6_rhash_params;
+ ret = inet_frags_init(&ip6_frags);
if (ret)
goto out;

+ ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ if (ret)
+ goto err_protocol;
+
ret = ip6_frags_sysctl_register();
if (ret)
goto err_sysctl;
@@ -745,16 +757,6 @@ int __init ipv6_frag_init(void)
if (ret)
goto err_pernet;

- ip6_frags.hashfn = ip6_hashfn;
- ip6_frags.constructor = ip6_frag_init;
- ip6_frags.destructor = NULL;
- ip6_frags.qsize = sizeof(struct frag_queue);
- ip6_frags.match = ip6_frag_match;
- ip6_frags.frag_expire = ip6_frag_expire;
- ip6_frags.frags_cache_name = ip6_frag_cache_name;
- ret = inet_frags_init(&ip6_frags);
- if (ret)
- goto err_pernet;
out:
return ret;

@@ -762,6 +764,8 @@ int __init ipv6_frag_init(void)
ip6_frags_sysctl_unregister();
err_sysctl:
inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+err_protocol:
+ inet_frags_fini(&ip6_frags);
goto out;
}

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index a123d0dc1ef9..053ba8646155 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -787,7 +787,8 @@ static int netlbl_unlabel_addrinfo_get(struct genl_info *info,
{
u32 addr_len;

- if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) {
+ if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] &&
+ info->attrs[NLBL_UNLABEL_A_IPV4MASK]) {
addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]);
if (addr_len != sizeof(struct in_addr) &&
addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK]))
diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index 8761877207ec..00c6af2ae1c2 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c
@@ -40,6 +40,8 @@ static void azx_clear_corbrp(struct hdac_bus *bus)
*/
void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus)
{
+ WARN_ON_ONCE(!bus->rb.area);
+
spin_lock_irq(&bus->reg_lock);
/* CORB set up */
bus->corb.addr = bus->rb.addr;
@@ -478,13 +480,15 @@ bool snd_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset)
/* reset controller */
azx_reset(bus, full_reset);

- /* initialize interrupts */
+ /* clear interrupts */
azx_int_clear(bus);
- azx_int_enable(bus);

/* initialize the codec command I/O */
snd_hdac_bus_init_cmd_io(bus);

+ /* enable interrupts after CORB/RIRB buffers are initialized above */
+ azx_int_enable(bus);
+
/* program the position buffer */
if (bus->use_posbuf && bus->posbuf.addr) {
snd_hdac_chip_writel(bus, DPLBASE, (u32)bus->posbuf.addr);
diff --git a/sound/soc/codecs/sigmadsp.c b/sound/soc/codecs/sigmadsp.c
index d53680ac78e4..6df158669420 100644
--- a/sound/soc/codecs/sigmadsp.c
+++ b/sound/soc/codecs/sigmadsp.c
@@ -117,8 +117,7 @@ static int sigmadsp_ctrl_write(struct sigmadsp *sigmadsp,
struct sigmadsp_control *ctrl, void *data)
{
/* safeload loads up to 20 bytes in a atomic operation */
- if (ctrl->num_bytes > 4 && ctrl->num_bytes <= 20 && sigmadsp->ops &&
- sigmadsp->ops->safeload)
+ if (ctrl->num_bytes <= 20 && sigmadsp->ops && sigmadsp->ops->safeload)
return sigmadsp->ops->safeload(sigmadsp, ctrl->addr, data,
ctrl->num_bytes);
else
diff --git a/sound/soc/codecs/wm8804-i2c.c b/sound/soc/codecs/wm8804-i2c.c
index f27464c2c5ba..79541960f45d 100644
--- a/sound/soc/codecs/wm8804-i2c.c
+++ b/sound/soc/codecs/wm8804-i2c.c
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/i2c.h>
+#include <linux/acpi.h>

#include "wm8804.h"

@@ -40,17 +41,29 @@ static const struct i2c_device_id wm8804_i2c_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wm8804_i2c_id);

+#if defined(CONFIG_OF)
static const struct of_device_id wm8804_of_match[] = {
{ .compatible = "wlf,wm8804", },
{ }
};
MODULE_DEVICE_TABLE(of, wm8804_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id wm8804_acpi_match[] = {
+ { "1AEC8804", 0 }, /* Wolfson PCI ID + part ID */
+ { "10138804", 0 }, /* Cirrus Logic PCI ID + part ID */
+ { },
+};
+MODULE_DEVICE_TABLE(acpi, wm8804_acpi_match);
+#endif

static struct i2c_driver wm8804_i2c_driver = {
.driver = {
.name = "wm8804",
.pm = &wm8804_pm,
- .of_match_table = wm8804_of_match,
+ .of_match_table = of_match_ptr(wm8804_of_match),
+ .acpi_match_table = ACPI_PTR(wm8804_acpi_match),
},
.probe = wm8804_i2c_probe,
.remove = wm8804_i2c_remove,
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 7656ff8aa066..c001d5a91d22 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -204,14 +204,23 @@ from ctypes import *
libpq = CDLL("libpq.so.5")
PQconnectdb = libpq.PQconnectdb
PQconnectdb.restype = c_void_p
+PQconnectdb.argtypes = [ c_char_p ]
PQfinish = libpq.PQfinish
+PQfinish.argtypes = [ c_void_p ]
PQstatus = libpq.PQstatus
+PQstatus.restype = c_int
+PQstatus.argtypes = [ c_void_p ]
PQexec = libpq.PQexec
PQexec.restype = c_void_p
+PQexec.argtypes = [ c_void_p, c_char_p ]
PQresultStatus = libpq.PQresultStatus
+PQresultStatus.restype = c_int
+PQresultStatus.argtypes = [ c_void_p ]
PQputCopyData = libpq.PQputCopyData
+PQputCopyData.restype = c_int
PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
PQputCopyEnd = libpq.PQputCopyEnd
+PQputCopyEnd.restype = c_int
PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]

sys.path.append(os.environ['PERF_EXEC_PATH'] + \
diff --git a/tools/testing/selftests/efivarfs/config b/tools/testing/selftests/efivarfs/config
new file mode 100644
index 000000000000..4e151f1005b2
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/config
@@ -0,0 +1 @@
+CONFIG_EFIVAR_FS=y
diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config
index 2fde30191a47..a7e8cd5bb265 100644
--- a/tools/testing/selftests/memory-hotplug/config
+++ b/tools/testing/selftests/memory-hotplug/config
@@ -2,3 +2,4 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTPLUG_SPARSE=y
CONFIG_NOTIFIER_ERROR_INJECTION=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_MEMORY_HOTREMOVE=y