summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Hutchings <ben@decadent.org.uk>2019-06-18 14:43:46 +0100
committerBen Hutchings <ben@decadent.org.uk>2019-06-18 14:43:46 +0100
commit5cb139a10c8ff8c3e3d2b003b97d30981e27612c (patch)
treea6810031eb04d09f0f91a9994e2fcb7aa669acbe
parent67ccdddf1a73dc878130b6c4477af0dad13e9781 (diff)
downloadlinux-stable-queue-5cb139a10c8ff8c3e3d2b003b97d30981e27612c.tar.gz
Add TCP DoS fixes
-rw-r--r--queue-3.16/series4
-rw-r--r--queue-3.16/tcp-add-tcp_min_snd_mss-sysctl.patch118
-rw-r--r--queue-3.16/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch39
-rw-r--r--queue-3.16/tcp-limit-payload-size-of-sacked-skbs.patch170
-rw-r--r--queue-3.16/tcp-tcp_fragment-should-apply-sane-memory-limits.patch74
5 files changed, 405 insertions, 0 deletions
diff --git a/queue-3.16/series b/queue-3.16/series
index a651d2fc..ad9b449a 100644
--- a/queue-3.16/series
+++ b/queue-3.16/series
@@ -4,3 +4,7 @@ drivers-virt-fsl_hypervisor.c-prevent-integer-overflow-in-ioctl.patch
scsi-megaraid_sas-return-error-when-create-dma-pool-failed.patch
ext4-zero-out-the-unused-memory-region-in-the-extent-tree-block.patch
bluetooth-hidp-fix-buffer-overflow.patch
+tcp-limit-payload-size-of-sacked-skbs.patch
+tcp-tcp_fragment-should-apply-sane-memory-limits.patch
+tcp-add-tcp_min_snd_mss-sysctl.patch
+tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch
diff --git a/queue-3.16/tcp-add-tcp_min_snd_mss-sysctl.patch b/queue-3.16/tcp-add-tcp_min_snd_mss-sysctl.patch
new file mode 100644
index 00000000..d4d5f9b2
--- /dev/null
+++ b/queue-3.16/tcp-add-tcp_min_snd_mss-sysctl.patch
@@ -0,0 +1,118 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 6 Jun 2019 09:15:31 -0700
+Subject: tcp: add tcp_min_snd_mss sysctl
+
+commit 5f3e2bf008c2221478101ee72f5cb4654b9fc363 upstream.
+
+Some TCP peers announce a very small MSS option in their SYN and/or
+SYN/ACK messages.
+
+This forces the stack to send packets with a very high network/cpu
+overhead.
+
+Linux has enforced a minimal value of 48. Since this value includes
+the size of TCP options, and that the options can consume up to 40
+bytes, this means that each segment can include only 8 bytes of payload.
+
+In some cases, it can be useful to increase the minimal value
+to a saner value.
+
+We still let the default to 48 (TCP_MIN_SND_MSS), for compatibility
+reasons.
+
+Note that TCP_MAXSEG socket option enforces a minimal value
+of (TCP_MIN_MSS). David Miller increased this minimal value
+in commit c39508d6f118 ("tcp: Make TCP_MAXSEG minimum more correct.")
+from 64 to 88.
+
+We might in the future merge TCP_MIN_SND_MSS and TCP_MIN_MSS.
+
+CVE-2019-11479 -- tcp mss hardcoded to 48
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Suggested-by: Jonathan Looney <jtl@netflix.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Cc: Bruce Curtis <brucec@netflix.com>
+Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[Salvatore Bonaccorso: Backport for context changes in 4.9.168]
+[bwh: Backported to 3.16: Make the sysctl global, consistent with
+ net.ipv4.tcp_base_mss]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+--- a/Documentation/networking/ip-sysctl.txt
++++ b/Documentation/networking/ip-sysctl.txt
+@@ -210,6 +210,14 @@ tcp_base_mss - INTEGER
+ Path MTU discovery (MTU probing). If MTU probing is enabled,
+ this is the initial MSS used by the connection.
+
++tcp_min_snd_mss - INTEGER
++ TCP SYN and SYNACK messages usually advertise an ADVMSS option,
++ as described in RFC 1122 and RFC 6691.
++ If this ADVMSS option is smaller than tcp_min_snd_mss,
++ it is silently capped to tcp_min_snd_mss.
++
++ Default : 48 (at least 8 bytes of payload per segment)
++
+ tcp_congestion_control - STRING
+ Set the congestion control algorithm to be used for new
+ connections. The algorithm "reno" is always available, but
+--- a/net/ipv4/sysctl_net_ipv4.c
++++ b/net/ipv4/sysctl_net_ipv4.c
+@@ -34,6 +34,8 @@ static int tcp_retr1_max = 255;
+ static int ip_local_port_range_min[] = { 1, 1 };
+ static int ip_local_port_range_max[] = { 65535, 65535 };
+ static int tcp_adv_win_scale_min = -31;
++static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
++static int tcp_min_snd_mss_max = 65535;
+ static int tcp_adv_win_scale_max = 31;
+ static int ip_ttl_min = 1;
+ static int ip_ttl_max = 255;
+@@ -608,6 +610,15 @@ static struct ctl_table ipv4_table[] = {
+ .proc_handler = proc_dointvec,
+ },
+ {
++ .procname = "tcp_min_snd_mss",
++ .data = &sysctl_tcp_min_snd_mss,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec_minmax,
++ .extra1 = &tcp_min_snd_mss_min,
++ .extra2 = &tcp_min_snd_mss_max,
++ },
++ {
+ .procname = "tcp_workaround_signed_windows",
+ .data = &sysctl_tcp_workaround_signed_windows,
+ .maxlen = sizeof(int),
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -61,6 +61,7 @@ int sysctl_tcp_tso_win_divisor __read_mo
+
+ int sysctl_tcp_mtu_probing __read_mostly = 0;
+ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
++int sysctl_tcp_min_snd_mss __read_mostly = TCP_MIN_SND_MSS;
+
+ /* By default, RFC2861 behavior. */
+ int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+@@ -1259,8 +1260,7 @@ static inline int __tcp_mtu_to_mss(struc
+ mss_now -= icsk->icsk_ext_hdr_len;
+
+ /* Then reserve room for full set of TCP options and 8 bytes of data */
+- if (mss_now < TCP_MIN_SND_MSS)
+- mss_now = TCP_MIN_SND_MSS;
++ mss_now = max(mss_now, sysctl_tcp_min_snd_mss);
+ return mss_now;
+ }
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -270,6 +270,7 @@ extern int sysctl_tcp_moderate_rcvbuf;
+ extern int sysctl_tcp_tso_win_divisor;
+ extern int sysctl_tcp_mtu_probing;
+ extern int sysctl_tcp_base_mss;
++extern int sysctl_tcp_min_snd_mss;
+ extern int sysctl_tcp_workaround_signed_windows;
+ extern int sysctl_tcp_slow_start_after_idle;
+ extern int sysctl_tcp_thin_linear_timeouts;
diff --git a/queue-3.16/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch b/queue-3.16/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch
new file mode 100644
index 00000000..c37bfc74
--- /dev/null
+++ b/queue-3.16/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch
@@ -0,0 +1,39 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 8 Jun 2019 10:22:49 -0700
+Subject: tcp: enforce tcp_min_snd_mss in tcp_mtu_probing()
+
+commit 967c05aee439e6e5d7d805e195b3a20ef5c433d6 upstream.
+
+If mtu probing is enabled tcp_mtu_probing() could very well end up
+with a too small MSS.
+
+Use the new sysctl tcp_min_snd_mss to make sure MSS search
+is performed in an acceptable range.
+
+CVE-2019-11479 -- tcp mss hardcoded to 48
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Jonathan Lemon <jonathan.lemon@gmail.com>
+Cc: Jonathan Looney <jtl@netflix.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Cc: Bruce Curtis <brucec@netflix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[Salvatore Bonaccorso: Backport for context changes in 4.9.168]
+[bwh: Backported to 3.16: The sysctl is global]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ net/ipv4/tcp_timer.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -113,6 +113,7 @@ static void tcp_mtu_probing(struct inet_
+ mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
+ mss = min(sysctl_tcp_base_mss, mss);
+ mss = max(mss, 68 - tp->tcp_header_len);
++ mss = max(mss, sysctl_tcp_min_snd_mss);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+ }
diff --git a/queue-3.16/tcp-limit-payload-size-of-sacked-skbs.patch b/queue-3.16/tcp-limit-payload-size-of-sacked-skbs.patch
new file mode 100644
index 00000000..8cda90c7
--- /dev/null
+++ b/queue-3.16/tcp-limit-payload-size-of-sacked-skbs.patch
@@ -0,0 +1,170 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 17 May 2019 17:17:22 -0700
+Subject: tcp: limit payload size of sacked skbs
+
+commit 3b4929f65b0d8249f19a50245cd88ed1a2f78cff upstream.
+
+Jonathan Looney reported that TCP can trigger the following crash
+in tcp_shifted_skb() :
+
+ BUG_ON(tcp_skb_pcount(skb) < pcount);
+
+This can happen if the remote peer has advertized the smallest
+MSS that linux TCP accepts : 48
+
+An skb can hold 17 fragments, and each fragment can hold 32KB
+on x86, or 64KB on PowerPC.
+
+This means that the 16bit witdh of TCP_SKB_CB(skb)->tcp_gso_segs
+can overflow.
+
+Note that tcp_sendmsg() builds skbs with less than 64KB
+of payload, so this problem needs SACK to be enabled.
+SACK blocks allow TCP to coalesce multiple skbs in the retransmit
+queue, thus filling the 17 fragments to maximal capacity.
+
+CVE-2019-11477 -- u16 overflow of TCP_SKB_CB(skb)->tcp_gso_segs
+
+Backport notes, provided by Joao Martins <joao.m.martins@oracle.com>
+
+v4.15 or since commit 737ff314563 ("tcp: use sequence distance to
+detect reordering") had switched from the packet-based FACK tracking and
+switched to sequence-based.
+
+v4.14 and older still have the old logic and hence on
+tcp_skb_shift_data() needs to retain its original logic and have
+@fack_count in sync. In other words, we keep the increment of pcount with
+tcp_skb_pcount(skb) to later used that to update fack_count. To make it
+more explicit we track the new skb that gets incremented to pcount in
+@next_pcount, and we get to avoid the constant invocation of
+tcp_skb_pcount(skb) all together.
+
+Fixes: 832d11c5cd07 ("tcp: Try to restore large SKBs while SACK processing")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Jonathan Looney <jtl@netflix.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Bruce Curtis <brucec@netflix.com>
+Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[Salvatore Bonaccorso: Adjust for context changes to backport to
+4.9.168]
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/linux/tcp.h | 4 ++++
+ include/net/tcp.h | 2 ++
+ net/ipv4/tcp.c | 1 +
+ net/ipv4/tcp_input.c | 26 ++++++++++++++++++++------
+ net/ipv4/tcp_output.c | 6 +++---
+ 5 files changed, 30 insertions(+), 9 deletions(-)
+
+--- a/include/linux/tcp.h
++++ b/include/linux/tcp.h
+@@ -394,4 +394,7 @@ static inline int fastopen_init_queue(st
+ return 0;
+ }
+
++int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
++ int shiftlen);
++
+ #endif /* _LINUX_TCP_H */
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -55,6 +55,8 @@ void tcp_time_wait(struct sock *sk, int
+
+ #define MAX_TCP_HEADER (128 + MAX_HEADER)
+ #define MAX_TCP_OPTION_SPACE 40
++#define TCP_MIN_SND_MSS 48
++#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
+
+ /*
+ * Never offer a window over 32767 without using window scaling. Some
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3169,6 +3169,7 @@ void __init tcp_init(void)
+ int max_rshare, max_wshare, cnt;
+ unsigned int i;
+
++ BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
+ BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
+
+ percpu_counter_init(&tcp_sockets_allocated, 0);
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1296,7 +1296,7 @@ static bool tcp_shifted_skb(struct sock
+ TCP_SKB_CB(skb)->seq += shifted;
+
+ skb_shinfo(prev)->gso_segs += pcount;
+- BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
++ WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
+ skb_shinfo(skb)->gso_segs -= pcount;
+
+ /* When we're adding to gso_segs == 1, gso_size will be zero,
+@@ -1362,6 +1362,21 @@ static int skb_can_shift(const struct sk
+ return !skb_headlen(skb) && skb_is_nonlinear(skb);
+ }
+
++int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
++ int pcount, int shiftlen)
++{
++ /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
++ * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
++ * to make sure not storing more than 65535 * 8 bytes per skb,
++ * even if current MSS is bigger.
++ */
++ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
++ return 0;
++ if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
++ return 0;
++ return skb_shift(to, from, shiftlen);
++}
++
+ /* Try collapsing SACK blocks spanning across multiple skbs to a single
+ * skb.
+ */
+@@ -1373,6 +1388,7 @@ static struct sk_buff *tcp_shift_skb_dat
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *prev;
+ int mss;
++ int next_pcount;
+ int pcount = 0;
+ int len;
+ int in_sack;
+@@ -1467,7 +1483,7 @@ static struct sk_buff *tcp_shift_skb_dat
+ if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
+ goto fallback;
+
+- if (!skb_shift(prev, skb, len))
++ if (!tcp_skb_shift(prev, skb, pcount, len))
+ goto fallback;
+ if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
+ goto out;
+@@ -1486,9 +1502,10 @@ static struct sk_buff *tcp_shift_skb_dat
+ goto out;
+
+ len = skb->len;
+- if (skb_shift(prev, skb, len)) {
+- pcount += tcp_skb_pcount(skb);
+- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
++ next_pcount = tcp_skb_pcount(skb);
++ if (tcp_skb_shift(prev, skb, next_pcount, len)) {
++ pcount += next_pcount;
++ tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0);
+ }
+
+ out:
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1254,8 +1254,8 @@ static inline int __tcp_mtu_to_mss(struc
+ mss_now -= icsk->icsk_ext_hdr_len;
+
+ /* Then reserve room for full set of TCP options and 8 bytes of data */
+- if (mss_now < 48)
+- mss_now = 48;
++ if (mss_now < TCP_MIN_SND_MSS)
++ mss_now = TCP_MIN_SND_MSS;
+ return mss_now;
+ }
+
diff --git a/queue-3.16/tcp-tcp_fragment-should-apply-sane-memory-limits.patch b/queue-3.16/tcp-tcp_fragment-should-apply-sane-memory-limits.patch
new file mode 100644
index 00000000..ac75fd49
--- /dev/null
+++ b/queue-3.16/tcp-tcp_fragment-should-apply-sane-memory-limits.patch
@@ -0,0 +1,74 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 18 May 2019 05:12:05 -0700
+Subject: tcp: tcp_fragment() should apply sane memory limits
+
+commit f070ef2ac66716357066b683fb0baf55f8191a2e upstream.
+
+Jonathan Looney reported that a malicious peer can force a sender
+to fragment its retransmit queue into tiny skbs, inflating memory
+usage and/or overflow 32bit counters.
+
+TCP allows an application to queue up to sk_sndbuf bytes,
+so we need to give some allowance for non malicious splitting
+of retransmit queue.
+
+A new SNMP counter is added to monitor how many times TCP
+did not allow to split an skb if the allowance was exceeded.
+
+Note that this counter might increase in the case applications
+use SO_SNDBUF socket option to lower sk_sndbuf.
+
+CVE-2019-11478 : tcp_fragment, prevent fragmenting a packet when the
+ socket is already using more than half the allowed space
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Jonathan Looney <jtl@netflix.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
+Cc: Bruce Curtis <brucec@netflix.com>
+Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[Salvatore Bonaccorso: Adjust context for backport to 4.9.168]
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/uapi/linux/snmp.h | 1 +
+ net/ipv4/proc.c | 1 +
+ net/ipv4/tcp_output.c | 5 +++++
+ 3 files changed, 7 insertions(+)
+
+--- a/include/uapi/linux/snmp.h
++++ b/include/uapi/linux/snmp.h
+@@ -265,6 +265,7 @@ enum
+ LINUX_MIB_TCPWANTZEROWINDOWADV, /* TCPWantZeroWindowAdv */
+ LINUX_MIB_TCPSYNRETRANS, /* TCPSynRetrans */
+ LINUX_MIB_TCPORIGDATASENT, /* TCPOrigDataSent */
++ LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */
+ __LINUX_MIB_MAX
+ };
+
+--- a/net/ipv4/proc.c
++++ b/net/ipv4/proc.c
+@@ -286,6 +286,7 @@ static const struct snmp_mib snmp4_net_l
+ SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV),
+ SNMP_MIB_ITEM("TCPSynRetrans", LINUX_MIB_TCPSYNRETRANS),
+ SNMP_MIB_ITEM("TCPOrigDataSent", LINUX_MIB_TCPORIGDATASENT),
++ SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
+ SNMP_MIB_SENTINEL
+ };
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1090,6 +1090,11 @@ int tcp_fragment(struct sock *sk, struct
+ if (nsize < 0)
+ nsize = 0;
+
++ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
++ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
++ return -ENOMEM;
++ }
++
+ if (skb_unclone(skb, gfp))
+ return -ENOMEM;
+