diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2019-07-02 06:52:43 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2019-07-02 06:52:43 +0200 |
commit | f696b93c7cebf5dba80f64f051165ce6efe6f5eb (patch) | |
tree | 5e5a3e267e9626daee4c69e04dec5864ebb61322 | |
parent | a3a89fb7895cac660bc427b169b1f5cd58026791 (diff) | |
download | queue-3.18-f696b93c7cebf5dba80f64f051165ce6efe6f5eb.tar.gz |
more patches
8 files changed, 418 insertions, 40 deletions
diff --git a/af_packet-block-execution-of-tasks-waiting-for-transmit-to-complete-in-af_packet.patch b/af_packet-block-execution-of-tasks-waiting-for-transmit-to-complete-in-af_packet.patch new file mode 100644 index 0000000..43fbb2b --- /dev/null +++ b/af_packet-block-execution-of-tasks-waiting-for-transmit-to-complete-in-af_packet.patch @@ -0,0 +1,153 @@ +From foo@baz Tue 02 Jul 2019 06:32:24 AM CEST +From: Neil Horman <nhorman@tuxdriver.com> +Date: Tue, 25 Jun 2019 17:57:49 -0400 +Subject: af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET + +From: Neil Horman <nhorman@tuxdriver.com> + +[ Upstream commit 89ed5b519004a7706f50b70f611edbd3aaacff2c ] + +When an application is run that: +a) Sets its scheduler to be SCHED_FIFO +and +b) Opens a memory mapped AF_PACKET socket, and sends frames with the +MSG_DONTWAIT flag cleared, its possible for the application to hang +forever in the kernel. This occurs because when waiting, the code in +tpacket_snd calls schedule, which under normal circumstances allows +other tasks to run, including ksoftirqd, which in some cases is +responsible for freeing the transmitted skb (which in AF_PACKET calls a +destructor that flips the status bit of the transmitted frame back to +available, allowing the transmitting task to complete). + +However, when the calling application is SCHED_FIFO, its priority is +such that the schedule call immediately places the task back on the cpu, +preventing ksoftirqd from freeing the skb, which in turn prevents the +transmitting task from detecting that the transmission is complete. + +We can fix this by converting the schedule call to a completion +mechanism. By using a completion queue, we force the calling task, when +it detects there are no more frames to send, to schedule itself off the +cpu until such time as the last transmitted skb is freed, allowing +forward progress to be made. + +Tested by myself and the reporter, with good results + +Change Notes: + +V1->V2: + Enhance the sleep logic to support being interruptible and +allowing for honoring to SK_SNDTIMEO (Willem de Bruijn) + +V2->V3: + Rearrage the point at which we wait for the completion queue, to +avoid needing to check for ph/skb being null at the end of the loop. +Also move the complete call to the skb destructor to avoid needing to +modify __packet_set_status. Also gate calling complete on +packet_read_pending returning zero to avoid multiple calls to complete. +(Willem de Bruijn) + + Move timeo computation within loop, to re-fetch the socket +timeout since we also use the timeo variable to record the return code +from the wait_for_complete call (Neil Horman) + +V3->V4: + Willem has requested that the control flow be restored to the +previous state. Doing so lets us eliminate the need for the +po->wait_on_complete flag variable, and lets us get rid of the +packet_next_frame function, but introduces another complexity. +Specifically, but using the packet pending count, we can, if an +applications calls sendmsg multiple times with MSG_DONTWAIT set, each +set of transmitted frames, when complete, will cause +tpacket_destruct_skb to issue a complete call, for which there will +never be a wait_on_completion call. This imbalance will lead to any +future call to wait_for_completion here to return early, when the frames +they sent may not have completed. To correct this, we need to re-init +the completion queue on every call to tpacket_snd before we enter the +loop so as to ensure we wait properly for the frames we send in this +iteration. + + Change the timeout and interrupted gotos to out_put rather than +out_status so that we don't try to free a non-existant skb + Clean up some extra newlines (Willem de Bruijn) + +Reviewed-by: Willem de Bruijn <willemb@google.com> +Signed-off-by: Neil Horman <nhorman@tuxdriver.com> +Reported-by: Matteo Croce <mcroce@redhat.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/packet/af_packet.c | 20 +++++++++++++++++--- + net/packet/internal.h | 1 + + 2 files changed, 18 insertions(+), 3 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2108,6 +2108,9 @@ static void tpacket_destruct_skb(struct + + ts = __packet_set_timestamp(po, ph, skb); + __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); ++ ++ if (!packet_read_pending(&po->tx_ring)) ++ complete(&po->skb_completion); + } + + sock_wfree(skb); +@@ -2250,7 +2253,7 @@ static int tpacket_fill_skb(struct packe + + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) + { +- struct sk_buff *skb; ++ struct sk_buff *skb = NULL; + struct net_device *dev; + __be16 proto; + int err, reserve = 0; +@@ -2262,6 +2265,7 @@ static int tpacket_snd(struct packet_soc + int len_sum = 0; + int status = TP_STATUS_AVAILABLE; + int hlen, tlen; ++ long timeo = 0; + + mutex_lock(&po->pg_vec_lock); + +@@ -2302,12 +2306,21 @@ static int tpacket_snd(struct packet_soc + if (size_max > dev->mtu + reserve + VLAN_HLEN) + size_max = dev->mtu + reserve + VLAN_HLEN; + ++ reinit_completion(&po->skb_completion); ++ + do { + ph = packet_current_frame(po, &po->tx_ring, + TP_STATUS_SEND_REQUEST); + if (unlikely(ph == NULL)) { +- if (need_wait && need_resched()) +- schedule(); ++ if (need_wait && skb) { ++ timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); ++ timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo); ++ if (timeo <= 0) { ++ err = !timeo ? -ETIMEDOUT : -ERESTARTSYS; ++ goto out_put; ++ } ++ } ++ /* check for additional frames */ + continue; + } + +@@ -2856,6 +2869,7 @@ static int packet_create(struct net *net + sock_init_data(sock, sk); + + po = pkt_sk(sk); ++ init_completion(&po->skb_completion); + sk->sk_family = PF_PACKET; + po->num = proto; + po->xmit = dev_queue_xmit; +--- a/net/packet/internal.h ++++ b/net/packet/internal.h +@@ -114,6 +114,7 @@ struct packet_sock { + unsigned int tp_hdrlen; + unsigned int tp_reserve; + unsigned int tp_tstamp; ++ struct completion skb_completion; + struct net_device __rcu *cached_dev; + int (*xmit)(struct sk_buff *skb); + struct packet_type prot_hook ____cacheline_aligned_in_smp; diff --git a/bonding-always-enable-vlan-tx-offload.patch b/bonding-always-enable-vlan-tx-offload.patch new file mode 100644 index 0000000..36d05e3 --- /dev/null +++ b/bonding-always-enable-vlan-tx-offload.patch @@ -0,0 +1,51 @@ +From foo@baz Tue 02 Jul 2019 06:20:09 AM CEST +From: YueHaibing <yuehaibing@huawei.com> +Date: Wed, 26 Jun 2019 16:08:44 +0800 +Subject: bonding: Always enable vlan tx offload + +From: YueHaibing <yuehaibing@huawei.com> + +[ Upstream commit 30d8177e8ac776d89d387fad547af6a0f599210e ] + +We build vlan on top of bonding interface, which vlan offload +is off, bond mode is 802.3ad (LACP) and xmit_hash_policy is +BOND_XMIT_POLICY_ENCAP34. + +Because vlan tx offload is off, vlan tci is cleared and skb push +the vlan header in validate_xmit_vlan() while sending from vlan +devices. Then in bond_xmit_hash, __skb_flow_dissect() fails to +get information from protocol headers encapsulated within vlan, +because 'nhoff' is points to IP header, so bond hashing is based +on layer 2 info, which fails to distribute packets across slaves. + +This patch always enable bonding's vlan tx offload, pass the vlan +packets to the slave devices with vlan tci, let them to handle +vlan implementation. + +Fixes: 278339a42a1b ("bonding: propogate vlan_features to bonding master") +Suggested-by: Jiri Pirko <jiri@resnulli.us> +Signed-off-by: YueHaibing <yuehaibing@huawei.com> +Acked-by: Jiri Pirko <jiri@mellanox.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/net/bonding/bond_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -4039,13 +4039,13 @@ void bond_setup(struct net_device *bond_ + bond_dev->features |= NETIF_F_NETNS_LOCAL; + + bond_dev->hw_features = BOND_VLAN_FEATURES | +- NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; + + bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); + bond_dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + bond_dev->features |= bond_dev->hw_features; ++ bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX; + } + + /* Destroy a bonding device. diff --git a/fs-binfmt_flat.c-make-load_flat_shared_library-work.patch b/fs-binfmt_flat.c-make-load_flat_shared_library-work.patch new file mode 100644 index 0000000..d3240f0 --- /dev/null +++ b/fs-binfmt_flat.c-make-load_flat_shared_library-work.patch @@ -0,0 +1,86 @@ +From 867bfa4a5fcee66f2b25639acae718e8b28b25a5 Mon Sep 17 00:00:00 2001 +From: Jann Horn <jannh@google.com> +Date: Fri, 28 Jun 2019 12:06:46 -0700 +Subject: fs/binfmt_flat.c: make load_flat_shared_library() work + +From: Jann Horn <jannh@google.com> + +commit 867bfa4a5fcee66f2b25639acae718e8b28b25a5 upstream. + +load_flat_shared_library() is broken: It only calls load_flat_file() if +prepare_binprm() returns zero, but prepare_binprm() returns the number of +bytes read - so this only happens if the file is empty. + +Instead, call into load_flat_file() if the number of bytes read is +non-negative. (Even if the number of bytes is zero - in that case, +load_flat_file() will see nullbytes and return a nice -ENOEXEC.) + +In addition, remove the code related to bprm creds and stop using +prepare_binprm() - this code is loading a library, not a main executable, +and it only actually uses the members "buf", "file" and "filename" of the +linux_binprm struct. Instead, call kernel_read() directly. + +Link: http://lkml.kernel.org/r/20190524201817.16509-1-jannh@google.com +Fixes: 287980e49ffc ("remove lots of IS_ERR_VALUE abuses") +Signed-off-by: Jann Horn <jannh@google.com> +Cc: Alexander Viro <viro@zeniv.linux.org.uk> +Cc: Kees Cook <keescook@chromium.org> +Cc: Nicolas Pitre <nicolas.pitre@linaro.org> +Cc: Arnd Bergmann <arnd@arndb.de> +Cc: Geert Uytterhoeven <geert@linux-m68k.org> +Cc: Russell King <linux@armlinux.org.uk> +Cc: Greg Ungerer <gerg@linux-m68k.org> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> + +--- + fs/binfmt_flat.c | 24 +++++++----------------- + 1 file changed, 7 insertions(+), 17 deletions(-) + +--- a/fs/binfmt_flat.c ++++ b/fs/binfmt_flat.c +@@ -808,9 +808,14 @@ err: + + static int load_flat_shared_library(int id, struct lib_info *libs) + { ++ /* ++ * This is a fake bprm struct; only the members "buf", "file" and ++ * "filename" are actually used. ++ */ + struct linux_binprm bprm; + int res; + char buf[16]; ++ loff_t pos = 0; + + memset(&bprm, 0, sizeof(bprm)); + +@@ -824,25 +829,10 @@ static int load_flat_shared_library(int + if (IS_ERR(bprm.file)) + return res; + +- bprm.cred = prepare_exec_creds(); +- res = -ENOMEM; +- if (!bprm.cred) +- goto out; +- +- /* We don't really care about recalculating credentials at this point +- * as we're past the point of no return and are dealing with shared +- * libraries. +- */ +- bprm.cred_prepared = 1; +- +- res = prepare_binprm(&bprm); +- +- if (!IS_ERR_VALUE(res)) ++ res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos); ++ if (res >= 0) + res = load_flat_file(&bprm, libs, id, NULL); + +- abort_creds(bprm.cred); +- +-out: + allow_write_access(bprm.file); + fput(bprm.file); + diff --git a/ipv4-use-return-value-of-inet_iif-for-__raw_v4_lookup-in-the-while-loop.patch b/ipv4-use-return-value-of-inet_iif-for-__raw_v4_lookup-in-the-while-loop.patch new file mode 100644 index 0000000..d3107a7 --- /dev/null +++ b/ipv4-use-return-value-of-inet_iif-for-__raw_v4_lookup-in-the-while-loop.patch @@ -0,0 +1,34 @@ +From foo@baz Tue 02 Jul 2019 06:26:14 AM CEST +From: Stephen Suryaputra <ssuryaextr@gmail.com> +Date: Mon, 24 Jun 2019 20:14:06 -0400 +Subject: ipv4: Use return value of inet_iif() for __raw_v4_lookup in the while loop + +From: Stephen Suryaputra <ssuryaextr@gmail.com> + +[ Upstream commit 38c73529de13e1e10914de7030b659a2f8b01c3b ] + +In commit 19e4e768064a8 ("ipv4: Fix raw socket lookup for local +traffic"), the dif argument to __raw_v4_lookup() is coming from the +returned value of inet_iif() but the change was done only for the first +lookup. Subsequent lookups in the while loop still use skb->dev->ifIndex. + +Fixes: 19e4e768064a8 ("ipv4: Fix raw socket lookup for local traffic") +Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com> +Reviewed-by: David Ahern <dsahern@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/ipv4/raw.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/raw.c ++++ b/net/ipv4/raw.c +@@ -186,7 +186,7 @@ static int raw_v4_input(struct sk_buff * + } + sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, + iph->saddr, iph->daddr, +- skb->dev->ifindex); ++ dif); + } + out: + read_unlock(&raw_v4_hashinfo.lock); diff --git a/sctp-change-to-hold-sk-after-auth-shkey-is-created-successfully.patch b/sctp-change-to-hold-sk-after-auth-shkey-is-created-successfully.patch new file mode 100644 index 0000000..7bd5fe6 --- /dev/null +++ b/sctp-change-to-hold-sk-after-auth-shkey-is-created-successfully.patch @@ -0,0 +1,51 @@ +From foo@baz Tue 02 Jul 2019 06:37:32 AM CEST +From: Xin Long <lucien.xin@gmail.com> +Date: Tue, 25 Jun 2019 00:21:45 +0800 +Subject: sctp: change to hold sk after auth shkey is created successfully + +From: Xin Long <lucien.xin@gmail.com> + +[ Upstream commit 25bff6d5478b2a02368097015b7d8eb727c87e16 ] + +Now in sctp_endpoint_init(), it holds the sk then creates auth +shkey. But when the creation fails, it doesn't release the sk, +which causes a sk defcnf leak, + +Here to fix it by only holding the sk when auth shkey is created +successfully. + +Fixes: a29a5bd4f5c3 ("[SCTP]: Implement SCTP-AUTH initializations.") +Reported-by: syzbot+afabda3890cc2f765041@syzkaller.appspotmail.com +Reported-by: syzbot+276ca1c77a19977c0130@syzkaller.appspotmail.com +Signed-off-by: Xin Long <lucien.xin@gmail.com> +Acked-by: Neil Horman <nhorman@redhat.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sctp/endpointola.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/sctp/endpointola.c ++++ b/net/sctp/endpointola.c +@@ -126,10 +126,6 @@ static struct sctp_endpoint *sctp_endpoi + /* Initialize the bind addr area */ + sctp_bind_addr_init(&ep->base.bind_addr, 0); + +- /* Remember who we are attached to. */ +- ep->base.sk = sk; +- sock_hold(ep->base.sk); +- + /* Create the lists of associations. */ + INIT_LIST_HEAD(&ep->asocs); + +@@ -165,6 +161,10 @@ static struct sctp_endpoint *sctp_endpoi + ep->auth_hmacs_list = auth_hmacs; + ep->auth_chunk_list = auth_chunks; + ++ /* Remember who we are attached to. */ ++ ep->base.sk = sk; ++ sock_hold(ep->base.sk); ++ + return ep; + + nomem_hmacs: @@ -1,4 +1,4 @@ -tcp-refine-memory-limit-test-in-tcp_fragment.patch +fs-binfmt_flat.c-make-load_flat_shared_library-work.patch tracing-silence-gcc-9-array-bounds-warning.patch input-uinput-add-compat-ioctl-number-translation-for-ui_-_ff_upload.patch apparmor-enforce-nullbyte-at-end-of-tag-string.patch @@ -11,3 +11,8 @@ smb3-retry-on-status_insufficient_resources-instead-of-failing-write.patch cfg80211-fix-memory-leak-of-wiphy-device-name.patch perf-help-remove-needless-use-of-strncpy.patch abort-file_remove_privs-for-non-reg.-files.patch +sctp-change-to-hold-sk-after-auth-shkey-is-created-successfully.patch +team-always-enable-vlan-tx-offload.patch +ipv4-use-return-value-of-inet_iif-for-__raw_v4_lookup-in-the-while-loop.patch +bonding-always-enable-vlan-tx-offload.patch +af_packet-block-execution-of-tasks-waiting-for-transmit-to-complete-in-af_packet.patch diff --git a/tcp-refine-memory-limit-test-in-tcp_fragment.patch b/tcp-refine-memory-limit-test-in-tcp_fragment.patch deleted file mode 100644 index 51d1d0f..0000000 --- a/tcp-refine-memory-limit-test-in-tcp_fragment.patch +++ /dev/null @@ -1,39 +0,0 @@ -From b6653b3629e5b88202be3c9abc44713973f5c4b4 Mon Sep 17 00:00:00 2001 -From: Eric Dumazet <edumazet@google.com> -Date: Fri, 21 Jun 2019 06:09:55 -0700 -Subject: tcp: refine memory limit test in tcp_fragment() - -From: Eric Dumazet <edumazet@google.com> - -commit b6653b3629e5b88202be3c9abc44713973f5c4b4 upstream. - -tcp_fragment() might be called for skbs in the write queue. - -Memory limits might have been exceeded because tcp_sendmsg() only -checks limits at full skb (64KB) boundaries. - -Therefore, we need to make sure tcp_fragment() wont punish applications -that might have setup very low SO_SNDBUF values. - -Fixes: f070ef2ac667 ("tcp: tcp_fragment() should apply sane memory limits") -Signed-off-by: Eric Dumazet <edumazet@google.com> -Reported-by: Christoph Paasch <cpaasch@apple.com> -Tested-by: Christoph Paasch <cpaasch@apple.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> - ---- - net/ipv4/tcp_output.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -1152,7 +1152,7 @@ int tcp_fragment(struct sock *sk, struct - if (nsize < 0) - nsize = 0; - -- if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { -+ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf + 0x20000)) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); - return -ENOMEM; - } diff --git a/team-always-enable-vlan-tx-offload.patch b/team-always-enable-vlan-tx-offload.patch new file mode 100644 index 0000000..c7e765d --- /dev/null +++ b/team-always-enable-vlan-tx-offload.patch @@ -0,0 +1,37 @@ +From foo@baz Tue 02 Jul 2019 06:20:09 AM CEST +From: YueHaibing <yuehaibing@huawei.com> +Date: Thu, 27 Jun 2019 00:03:39 +0800 +Subject: team: Always enable vlan tx offload + +From: YueHaibing <yuehaibing@huawei.com> + +[ Upstream commit ee4297420d56a0033a8593e80b33fcc93fda8509 ] + +We should rather have vlan_tci filled all the way down +to the transmitting netdevice and let it do the hw/sw +vlan implementation. + +Suggested-by: Jiri Pirko <jiri@resnulli.us> +Signed-off-by: YueHaibing <yuehaibing@huawei.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/net/team/team.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -2079,12 +2079,12 @@ static void team_setup(struct net_device + dev->features |= NETIF_F_NETNS_LOCAL; + + dev->hw_features = TEAM_VLAN_FEATURES | +- NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; + + dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); + dev->features |= dev->hw_features; ++ dev->features |= NETIF_F_HW_VLAN_CTAG_TX; + } + + static int team_newlink(struct net *src_net, struct net_device *dev, |