Re: Linux 4.0.1

From: Greg KH
Date: Wed Apr 29 2015 - 04:56:30 EST


diff --git a/Makefile b/Makefile
index fbd43bfe4445..f499cd2f5738 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 0
-SUBLEVEL = 0
+SUBLEVEL = 1
EXTRAVERSION =
NAME = Hurr durr I'ma sheep

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 4085c4b31047..355d5fea5be9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -531,20 +531,8 @@ struct bnx2x_fastpath {
struct napi_struct napi;

#ifdef CONFIG_NET_RX_BUSY_POLL
- unsigned int state;
-#define BNX2X_FP_STATE_IDLE 0
-#define BNX2X_FP_STATE_NAPI (1 << 0) /* NAPI owns this FP */
-#define BNX2X_FP_STATE_POLL (1 << 1) /* poll owns this FP */
-#define BNX2X_FP_STATE_DISABLED (1 << 2)
-#define BNX2X_FP_STATE_NAPI_YIELD (1 << 3) /* NAPI yielded this FP */
-#define BNX2X_FP_STATE_POLL_YIELD (1 << 4) /* poll yielded this FP */
-#define BNX2X_FP_OWNED (BNX2X_FP_STATE_NAPI | BNX2X_FP_STATE_POLL)
-#define BNX2X_FP_YIELD (BNX2X_FP_STATE_NAPI_YIELD | BNX2X_FP_STATE_POLL_YIELD)
-#define BNX2X_FP_LOCKED (BNX2X_FP_OWNED | BNX2X_FP_STATE_DISABLED)
-#define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD)
- /* protect state */
- spinlock_t lock;
-#endif /* CONFIG_NET_RX_BUSY_POLL */
+ unsigned long busy_poll_state;
+#endif

union host_hc_status_block status_blk;
/* chip independent shortcuts into sb structure */
@@ -619,104 +607,83 @@ struct bnx2x_fastpath {
#define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats))

#ifdef CONFIG_NET_RX_BUSY_POLL
-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
+
+enum bnx2x_fp_state {
+ BNX2X_STATE_FP_NAPI = BIT(0), /* NAPI handler owns the queue */
+
+ BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */
+ BNX2X_STATE_FP_NAPI_REQ = BIT(1),
+
+ BNX2X_STATE_FP_POLL_BIT = 2,
+ BNX2X_STATE_FP_POLL = BIT(2), /* busy_poll owns the queue */
+
+ BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */
+};
+
+static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
{
- spin_lock_init(&fp->lock);
- fp->state = BNX2X_FP_STATE_IDLE;
+ WRITE_ONCE(fp->busy_poll_state, 0);
}

/* called from the device poll routine to get ownership of a FP */
static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
{
- bool rc = true;
-
- spin_lock_bh(&fp->lock);
- if (fp->state & BNX2X_FP_LOCKED) {
- WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
- fp->state |= BNX2X_FP_STATE_NAPI_YIELD;
- rc = false;
- } else {
- /* we don't care if someone yielded */
- fp->state = BNX2X_FP_STATE_NAPI;
+ unsigned long prev, old = READ_ONCE(fp->busy_poll_state);
+
+ while (1) {
+ switch (old) {
+ case BNX2X_STATE_FP_POLL:
+ /* make sure bnx2x_fp_lock_poll() wont starve us */
+ set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT,
+ &fp->busy_poll_state);
+ /* fallthrough */
+ case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ:
+ return false;
+ default:
+ break;
+ }
+ prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI);
+ if (unlikely(prev != old)) {
+ old = prev;
+ continue;
+ }
+ return true;
}
- spin_unlock_bh(&fp->lock);
- return rc;
}

-/* returns true is someone tried to get the FP while napi had it */
-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
{
- bool rc = false;
-
- spin_lock_bh(&fp->lock);
- WARN_ON(fp->state &
- (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_NAPI_YIELD));
-
- if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
- rc = true;
-
- /* state ==> idle, unless currently disabled */
- fp->state &= BNX2X_FP_STATE_DISABLED;
- spin_unlock_bh(&fp->lock);
- return rc;
+ smp_wmb();
+ fp->busy_poll_state = 0;
}

/* called from bnx2x_low_latency_poll() */
static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
{
- bool rc = true;
-
- spin_lock_bh(&fp->lock);
- if ((fp->state & BNX2X_FP_LOCKED)) {
- fp->state |= BNX2X_FP_STATE_POLL_YIELD;
- rc = false;
- } else {
- /* preserve yield marks */
- fp->state |= BNX2X_FP_STATE_POLL;
- }
- spin_unlock_bh(&fp->lock);
- return rc;
+ return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0;
}

-/* returns true if someone tried to get the FP while it was locked */
-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
{
- bool rc = false;
-
- spin_lock_bh(&fp->lock);
- WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
-
- if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
- rc = true;
-
- /* state ==> idle, unless currently disabled */
- fp->state &= BNX2X_FP_STATE_DISABLED;
- spin_unlock_bh(&fp->lock);
- return rc;
+ smp_mb__before_atomic();
+ clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state);
}

-/* true if a socket is polling, even if it did not get the lock */
+/* true if a socket is polling */
static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
{
- WARN_ON(!(fp->state & BNX2X_FP_OWNED));
- return fp->state & BNX2X_FP_USER_PEND;
+ return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL;
}

/* false if fp is currently owned */
static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp)
{
- int rc = true;
-
- spin_lock_bh(&fp->lock);
- if (fp->state & BNX2X_FP_OWNED)
- rc = false;
- fp->state |= BNX2X_FP_STATE_DISABLED;
- spin_unlock_bh(&fp->lock);
+ set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state);
+ return !bnx2x_fp_ll_polling(fp);

- return rc;
}
#else
-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
{
}

@@ -725,9 +692,8 @@ static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
return true;
}

-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
{
- return false;
}

static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
@@ -735,9 +701,8 @@ static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
return false;
}

-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
{
- return false;
}

static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 0a9faa134a9a..2f63467bce46 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1849,7 +1849,7 @@ static void bnx2x_napi_enable_cnic(struct bnx2x *bp)
int i;

for_each_rx_queue_cnic(bp, i) {
- bnx2x_fp_init_lock(&bp->fp[i]);
+ bnx2x_fp_busy_poll_init(&bp->fp[i]);
napi_enable(&bnx2x_fp(bp, i, napi));
}
}
@@ -1859,7 +1859,7 @@ static void bnx2x_napi_enable(struct bnx2x *bp)
int i;

for_each_eth_queue(bp, i) {
- bnx2x_fp_init_lock(&bp->fp[i]);
+ bnx2x_fp_busy_poll_init(&bp->fp[i]);
napi_enable(&bnx2x_fp(bp, i, napi));
}
}
@@ -3191,9 +3191,10 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
}
}

+ bnx2x_fp_unlock_napi(fp);
+
/* Fall out from the NAPI loop if needed */
- if (!bnx2x_fp_unlock_napi(fp) &&
- !(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+ if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {

/* No need to update SB for FCoE L2 ring as long as
* it's connected to the default SB and the SB
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f8528a4cf54f..fceb637efd6b 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1713,12 +1713,6 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
}
}

- skb = iptunnel_handle_offloads(skb, udp_sum, type);
- if (IS_ERR(skb)) {
- err = -EINVAL;
- goto err;
- }
-
skb_scrub_packet(skb, xnet);

min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
@@ -1738,6 +1732,12 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
goto err;
}

+ skb = iptunnel_handle_offloads(skb, udp_sum, type);
+ if (IS_ERR(skb)) {
+ err = -EINVAL;
+ goto err;
+ }
+
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = md->vni;
@@ -1798,10 +1798,6 @@ int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,
}
}

- skb = iptunnel_handle_offloads(skb, udp_sum, type);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ VXLAN_HLEN + sizeof(struct iphdr)
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@@ -1817,6 +1813,10 @@ int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,
if (WARN_ON(!skb))
return -ENOMEM;

+ skb = iptunnel_handle_offloads(skb, udp_sum, type);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = md->vni;
diff --git a/fs/exec.c b/fs/exec.c
index c7f9b733406d..00400cf522dc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1265,6 +1265,53 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
spin_unlock(&p->fs->lock);
}

+static void bprm_fill_uid(struct linux_binprm *bprm)
+{
+ struct inode *inode;
+ unsigned int mode;
+ kuid_t uid;
+ kgid_t gid;
+
+ /* clear any previous set[ug]id data from a previous binary */
+ bprm->cred->euid = current_euid();
+ bprm->cred->egid = current_egid();
+
+ if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+ return;
+
+ if (task_no_new_privs(current))
+ return;
+
+ inode = file_inode(bprm->file);
+ mode = READ_ONCE(inode->i_mode);
+ if (!(mode & (S_ISUID|S_ISGID)))
+ return;
+
+ /* Be careful if suid/sgid is set */
+ mutex_lock(&inode->i_mutex);
+
+ /* reload atomically mode/uid/gid now that lock held */
+ mode = inode->i_mode;
+ uid = inode->i_uid;
+ gid = inode->i_gid;
+ mutex_unlock(&inode->i_mutex);
+
+ /* We ignore suid/sgid if there are no mappings for them in the ns */
+ if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
+ !kgid_has_mapping(bprm->cred->user_ns, gid))
+ return;
+
+ if (mode & S_ISUID) {
+ bprm->per_clear |= PER_CLEAR_ON_SETID;
+ bprm->cred->euid = uid;
+ }
+
+ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+ bprm->per_clear |= PER_CLEAR_ON_SETID;
+ bprm->cred->egid = gid;
+ }
+}
+
/*
* Fill the binprm structure from the inode.
* Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
@@ -1273,36 +1320,9 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
*/
int prepare_binprm(struct linux_binprm *bprm)
{
- struct inode *inode = file_inode(bprm->file);
- umode_t mode = inode->i_mode;
int retval;

-
- /* clear any previous set[ug]id data from a previous binary */
- bprm->cred->euid = current_euid();
- bprm->cred->egid = current_egid();
-
- if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
- !task_no_new_privs(current) &&
- kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
- kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
- /* Set-uid? */
- if (mode & S_ISUID) {
- bprm->per_clear |= PER_CLEAR_ON_SETID;
- bprm->cred->euid = inode->i_uid;
- }
-
- /* Set-gid? */
- /*
- * If setgid is set but no group execute bit then this
- * is a candidate for mandatory locking, not a setgid
- * executable.
- */
- if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
- bprm->per_clear |= PER_CLEAR_ON_SETID;
- bprm->cred->egid = inode->i_gid;
- }
- }
+ bprm_fill_uid(bprm);

/* fill in binprm security blob */
retval = security_bprm_set_creds(bprm);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a28e09c7825d..36508e69e92a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1380,7 +1380,8 @@ peek_stack:
/* tell verifier to check for equivalent states
* after every call and jump
*/
- env->explored_states[t + 1] = STATE_LIST_MARK;
+ if (t + 1 < insn_cnt)
+ env->explored_states[t + 1] = STATE_LIST_MARK;
} else {
/* conditional jump with two edges */
ret = push_insn(t, t + 1, FALLTHROUGH, env);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8e4ac97c8477..98d45fe72f51 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4169,19 +4169,21 @@ EXPORT_SYMBOL(skb_try_coalesce);
*/
void skb_scrub_packet(struct sk_buff *skb, bool xnet)
{
- if (xnet)
- skb_orphan(skb);
skb->tstamp.tv64 = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
skb->ignore_df = 0;
skb_dst_drop(skb);
- skb->mark = 0;
skb_sender_cpu_clear(skb);
- skb_init_secmark(skb);
secpath_reset(skb);
nf_reset(skb);
nf_reset_trace(skb);
+
+ if (!xnet)
+ return;
+
+ skb_orphan(skb);
+ skb->mark = 0;
}
EXPORT_SYMBOL_GPL(skb_scrub_packet);

diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 5a4828ba05ad..a566a2e4715b 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -113,10 +113,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
int min_headroom;
int err;

- skb = udp_tunnel_handle_offloads(skb, csum);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@@ -131,6 +127,10 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
if (unlikely(!skb))
return -ENOMEM;

+ skb = udp_tunnel_handle_offloads(skb, csum);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1db253e36045..d520492ba698 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2929,6 +2929,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
}
#endif

+ /* Do not fool tcpdump (if any), clean our debris */
+ skb->tstamp.tv64 = 0;
return skb;
}
EXPORT_SYMBOL(tcp_make_synack);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/