diff options
author | Ben Hutchings <ben@decadent.org.uk> | 2019-06-18 14:43:46 +0100 |
---|---|---|
committer | Ben Hutchings <ben@decadent.org.uk> | 2019-06-18 14:43:46 +0100 |
commit | 5cb139a10c8ff8c3e3d2b003b97d30981e27612c (patch) | |
tree | a6810031eb04d09f0f91a9994e2fcb7aa669acbe | |
parent | 67ccdddf1a73dc878130b6c4477af0dad13e9781 (diff) | |
download | linux-stable-queue-5cb139a10c8ff8c3e3d2b003b97d30981e27612c.tar.gz |
Add TCP DoS fixes
-rw-r--r-- | queue-3.16/series | 4 | ||||
-rw-r--r-- | queue-3.16/tcp-add-tcp_min_snd_mss-sysctl.patch | 118 | ||||
-rw-r--r-- | queue-3.16/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch | 39 | ||||
-rw-r--r-- | queue-3.16/tcp-limit-payload-size-of-sacked-skbs.patch | 170 | ||||
-rw-r--r-- | queue-3.16/tcp-tcp_fragment-should-apply-sane-memory-limits.patch | 74 |
5 files changed, 405 insertions, 0 deletions
diff --git a/queue-3.16/series b/queue-3.16/series index a651d2fc..ad9b449a 100644 --- a/queue-3.16/series +++ b/queue-3.16/series @@ -4,3 +4,7 @@ drivers-virt-fsl_hypervisor.c-prevent-integer-overflow-in-ioctl.patch scsi-megaraid_sas-return-error-when-create-dma-pool-failed.patch ext4-zero-out-the-unused-memory-region-in-the-extent-tree-block.patch bluetooth-hidp-fix-buffer-overflow.patch +tcp-limit-payload-size-of-sacked-skbs.patch +tcp-tcp_fragment-should-apply-sane-memory-limits.patch +tcp-add-tcp_min_snd_mss-sysctl.patch +tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch diff --git a/queue-3.16/tcp-add-tcp_min_snd_mss-sysctl.patch b/queue-3.16/tcp-add-tcp_min_snd_mss-sysctl.patch new file mode 100644 index 00000000..d4d5f9b2 --- /dev/null +++ b/queue-3.16/tcp-add-tcp_min_snd_mss-sysctl.patch @@ -0,0 +1,118 @@ +From: Eric Dumazet <edumazet@google.com> +Date: Thu, 6 Jun 2019 09:15:31 -0700 +Subject: tcp: add tcp_min_snd_mss sysctl + +commit 5f3e2bf008c2221478101ee72f5cb4654b9fc363 upstream. + +Some TCP peers announce a very small MSS option in their SYN and/or +SYN/ACK messages. + +This forces the stack to send packets with a very high network/cpu +overhead. + +Linux has enforced a minimal value of 48. Since this value includes +the size of TCP options, and that the options can consume up to 40 +bytes, this means that each segment can include only 8 bytes of payload. + +In some cases, it can be useful to increase the minimal value +to a saner value. + +We still let the default to 48 (TCP_MIN_SND_MSS), for compatibility +reasons. + +Note that TCP_MAXSEG socket option enforces a minimal value +of (TCP_MIN_MSS). David Miller increased this minimal value +in commit c39508d6f118 ("tcp: Make TCP_MAXSEG minimum more correct.") +from 64 to 88. + +We might in the future merge TCP_MIN_SND_MSS and TCP_MIN_MSS. + +CVE-2019-11479 -- tcp mss hardcoded to 48 + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Suggested-by: Jonathan Looney <jtl@netflix.com> +Acked-by: Neal Cardwell <ncardwell@google.com> +Cc: Yuchung Cheng <ycheng@google.com> +Cc: Tyler Hicks <tyhicks@canonical.com> +Cc: Bruce Curtis <brucec@netflix.com> +Cc: Jonathan Lemon <jonathan.lemon@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +[Salvatore Bonaccorso: Backport for context changes in 4.9.168] +[bwh: Backported to 3.16: Make the sysctl global, consistent with + net.ipv4.tcp_base_mss] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- +--- a/Documentation/networking/ip-sysctl.txt ++++ b/Documentation/networking/ip-sysctl.txt +@@ -210,6 +210,14 @@ tcp_base_mss - INTEGER + Path MTU discovery (MTU probing). If MTU probing is enabled, + this is the initial MSS used by the connection. + ++tcp_min_snd_mss - INTEGER ++ TCP SYN and SYNACK messages usually advertise an ADVMSS option, ++ as described in RFC 1122 and RFC 6691. ++ If this ADVMSS option is smaller than tcp_min_snd_mss, ++ it is silently capped to tcp_min_snd_mss. ++ ++ Default : 48 (at least 8 bytes of payload per segment) ++ + tcp_congestion_control - STRING + Set the congestion control algorithm to be used for new + connections. The algorithm "reno" is always available, but +--- a/net/ipv4/sysctl_net_ipv4.c ++++ b/net/ipv4/sysctl_net_ipv4.c +@@ -34,6 +34,8 @@ static int tcp_retr1_max = 255; + static int ip_local_port_range_min[] = { 1, 1 }; + static int ip_local_port_range_max[] = { 65535, 65535 }; + static int tcp_adv_win_scale_min = -31; ++static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; ++static int tcp_min_snd_mss_max = 65535; + static int tcp_adv_win_scale_max = 31; + static int ip_ttl_min = 1; + static int ip_ttl_max = 255; +@@ -608,6 +610,15 @@ static struct ctl_table ipv4_table[] = { + .proc_handler = proc_dointvec, + }, + { ++ .procname = "tcp_min_snd_mss", ++ .data = &sysctl_tcp_min_snd_mss, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = &tcp_min_snd_mss_min, ++ .extra2 = &tcp_min_snd_mss_max, ++ }, ++ { + .procname = "tcp_workaround_signed_windows", + .data = &sysctl_tcp_workaround_signed_windows, + .maxlen = sizeof(int), +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -61,6 +61,7 @@ int sysctl_tcp_tso_win_divisor __read_mo + + int sysctl_tcp_mtu_probing __read_mostly = 0; + int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; ++int sysctl_tcp_min_snd_mss __read_mostly = TCP_MIN_SND_MSS; + + /* By default, RFC2861 behavior. */ + int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +@@ -1259,8 +1260,7 @@ static inline int __tcp_mtu_to_mss(struc + mss_now -= icsk->icsk_ext_hdr_len; + + /* Then reserve room for full set of TCP options and 8 bytes of data */ +- if (mss_now < TCP_MIN_SND_MSS) +- mss_now = TCP_MIN_SND_MSS; ++ mss_now = max(mss_now, sysctl_tcp_min_snd_mss); + return mss_now; + } + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -270,6 +270,7 @@ extern int sysctl_tcp_moderate_rcvbuf; + extern int sysctl_tcp_tso_win_divisor; + extern int sysctl_tcp_mtu_probing; + extern int sysctl_tcp_base_mss; ++extern int sysctl_tcp_min_snd_mss; + extern int sysctl_tcp_workaround_signed_windows; + extern int sysctl_tcp_slow_start_after_idle; + extern int sysctl_tcp_thin_linear_timeouts; diff --git a/queue-3.16/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch b/queue-3.16/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch new file mode 100644 index 00000000..c37bfc74 --- /dev/null +++ b/queue-3.16/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch @@ -0,0 +1,39 @@ +From: Eric Dumazet <edumazet@google.com> +Date: Sat, 8 Jun 2019 10:22:49 -0700 +Subject: tcp: enforce tcp_min_snd_mss in tcp_mtu_probing() + +commit 967c05aee439e6e5d7d805e195b3a20ef5c433d6 upstream. + +If mtu probing is enabled tcp_mtu_probing() could very well end up +with a too small MSS. + +Use the new sysctl tcp_min_snd_mss to make sure MSS search +is performed in an acceptable range. + +CVE-2019-11479 -- tcp mss hardcoded to 48 + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Jonathan Lemon <jonathan.lemon@gmail.com> +Cc: Jonathan Looney <jtl@netflix.com> +Acked-by: Neal Cardwell <ncardwell@google.com> +Cc: Yuchung Cheng <ycheng@google.com> +Cc: Tyler Hicks <tyhicks@canonical.com> +Cc: Bruce Curtis <brucec@netflix.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +[Salvatore Bonaccorso: Backport for context changes in 4.9.168] +[bwh: Backported to 3.16: The sysctl is global] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + net/ipv4/tcp_timer.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -113,6 +113,7 @@ static void tcp_mtu_probing(struct inet_ + mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; + mss = min(sysctl_tcp_base_mss, mss); + mss = max(mss, 68 - tp->tcp_header_len); ++ mss = max(mss, sysctl_tcp_min_snd_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); + } diff --git a/queue-3.16/tcp-limit-payload-size-of-sacked-skbs.patch b/queue-3.16/tcp-limit-payload-size-of-sacked-skbs.patch new file mode 100644 index 00000000..8cda90c7 --- /dev/null +++ b/queue-3.16/tcp-limit-payload-size-of-sacked-skbs.patch @@ -0,0 +1,170 @@ +From: Eric Dumazet <edumazet@google.com> +Date: Fri, 17 May 2019 17:17:22 -0700 +Subject: tcp: limit payload size of sacked skbs + +commit 3b4929f65b0d8249f19a50245cd88ed1a2f78cff upstream. + +Jonathan Looney reported that TCP can trigger the following crash +in tcp_shifted_skb() : + + BUG_ON(tcp_skb_pcount(skb) < pcount); + +This can happen if the remote peer has advertized the smallest +MSS that linux TCP accepts : 48 + +An skb can hold 17 fragments, and each fragment can hold 32KB +on x86, or 64KB on PowerPC. + +This means that the 16bit witdh of TCP_SKB_CB(skb)->tcp_gso_segs +can overflow. + +Note that tcp_sendmsg() builds skbs with less than 64KB +of payload, so this problem needs SACK to be enabled. +SACK blocks allow TCP to coalesce multiple skbs in the retransmit +queue, thus filling the 17 fragments to maximal capacity. + +CVE-2019-11477 -- u16 overflow of TCP_SKB_CB(skb)->tcp_gso_segs + +Backport notes, provided by Joao Martins <joao.m.martins@oracle.com> + +v4.15 or since commit 737ff314563 ("tcp: use sequence distance to +detect reordering") had switched from the packet-based FACK tracking and +switched to sequence-based. + +v4.14 and older still have the old logic and hence on +tcp_skb_shift_data() needs to retain its original logic and have +@fack_count in sync. In other words, we keep the increment of pcount with +tcp_skb_pcount(skb) to later used that to update fack_count. To make it +more explicit we track the new skb that gets incremented to pcount in +@next_pcount, and we get to avoid the constant invocation of +tcp_skb_pcount(skb) all together. + +Fixes: 832d11c5cd07 ("tcp: Try to restore large SKBs while SACK processing") +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Jonathan Looney <jtl@netflix.com> +Acked-by: Neal Cardwell <ncardwell@google.com> +Reviewed-by: Tyler Hicks <tyhicks@canonical.com> +Cc: Yuchung Cheng <ycheng@google.com> +Cc: Bruce Curtis <brucec@netflix.com> +Cc: Jonathan Lemon <jonathan.lemon@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +[Salvatore Bonaccorso: Adjust for context changes to backport to +4.9.168] +[bwh: Backported to 3.16: adjust context] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + include/linux/tcp.h | 4 ++++ + include/net/tcp.h | 2 ++ + net/ipv4/tcp.c | 1 + + net/ipv4/tcp_input.c | 26 ++++++++++++++++++++------ + net/ipv4/tcp_output.c | 6 +++--- + 5 files changed, 30 insertions(+), 9 deletions(-) + +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -394,4 +394,7 @@ static inline int fastopen_init_queue(st + return 0; + } + ++int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, ++ int shiftlen); ++ + #endif /* _LINUX_TCP_H */ +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -55,6 +55,8 @@ void tcp_time_wait(struct sock *sk, int + + #define MAX_TCP_HEADER (128 + MAX_HEADER) + #define MAX_TCP_OPTION_SPACE 40 ++#define TCP_MIN_SND_MSS 48 ++#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) + + /* + * Never offer a window over 32767 without using window scaling. Some +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3169,6 +3169,7 @@ void __init tcp_init(void) + int max_rshare, max_wshare, cnt; + unsigned int i; + ++ BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); + BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); + + percpu_counter_init(&tcp_sockets_allocated, 0); +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1296,7 +1296,7 @@ static bool tcp_shifted_skb(struct sock + TCP_SKB_CB(skb)->seq += shifted; + + skb_shinfo(prev)->gso_segs += pcount; +- BUG_ON(skb_shinfo(skb)->gso_segs < pcount); ++ WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); + skb_shinfo(skb)->gso_segs -= pcount; + + /* When we're adding to gso_segs == 1, gso_size will be zero, +@@ -1362,6 +1362,21 @@ static int skb_can_shift(const struct sk + return !skb_headlen(skb) && skb_is_nonlinear(skb); + } + ++int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, ++ int pcount, int shiftlen) ++{ ++ /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) ++ * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need ++ * to make sure not storing more than 65535 * 8 bytes per skb, ++ * even if current MSS is bigger. ++ */ ++ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) ++ return 0; ++ if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) ++ return 0; ++ return skb_shift(to, from, shiftlen); ++} ++ + /* Try collapsing SACK blocks spanning across multiple skbs to a single + * skb. + */ +@@ -1373,6 +1388,7 @@ static struct sk_buff *tcp_shift_skb_dat + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *prev; + int mss; ++ int next_pcount; + int pcount = 0; + int len; + int in_sack; +@@ -1467,7 +1483,7 @@ static struct sk_buff *tcp_shift_skb_dat + if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) + goto fallback; + +- if (!skb_shift(prev, skb, len)) ++ if (!tcp_skb_shift(prev, skb, pcount, len)) + goto fallback; + if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) + goto out; +@@ -1486,9 +1502,10 @@ static struct sk_buff *tcp_shift_skb_dat + goto out; + + len = skb->len; +- if (skb_shift(prev, skb, len)) { +- pcount += tcp_skb_pcount(skb); +- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); ++ next_pcount = tcp_skb_pcount(skb); ++ if (tcp_skb_shift(prev, skb, next_pcount, len)) { ++ pcount += next_pcount; ++ tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0); + } + + out: +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1254,8 +1254,8 @@ static inline int __tcp_mtu_to_mss(struc + mss_now -= icsk->icsk_ext_hdr_len; + + /* Then reserve room for full set of TCP options and 8 bytes of data */ +- if (mss_now < 48) +- mss_now = 48; ++ if (mss_now < TCP_MIN_SND_MSS) ++ mss_now = TCP_MIN_SND_MSS; + return mss_now; + } + diff --git a/queue-3.16/tcp-tcp_fragment-should-apply-sane-memory-limits.patch b/queue-3.16/tcp-tcp_fragment-should-apply-sane-memory-limits.patch new file mode 100644 index 00000000..ac75fd49 --- /dev/null +++ b/queue-3.16/tcp-tcp_fragment-should-apply-sane-memory-limits.patch @@ -0,0 +1,74 @@ +From: Eric Dumazet <edumazet@google.com> +Date: Sat, 18 May 2019 05:12:05 -0700 +Subject: tcp: tcp_fragment() should apply sane memory limits + +commit f070ef2ac66716357066b683fb0baf55f8191a2e upstream. + +Jonathan Looney reported that a malicious peer can force a sender +to fragment its retransmit queue into tiny skbs, inflating memory +usage and/or overflow 32bit counters. + +TCP allows an application to queue up to sk_sndbuf bytes, +so we need to give some allowance for non malicious splitting +of retransmit queue. + +A new SNMP counter is added to monitor how many times TCP +did not allow to split an skb if the allowance was exceeded. + +Note that this counter might increase in the case applications +use SO_SNDBUF socket option to lower sk_sndbuf. + +CVE-2019-11478 : tcp_fragment, prevent fragmenting a packet when the + socket is already using more than half the allowed space + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: Jonathan Looney <jtl@netflix.com> +Acked-by: Neal Cardwell <ncardwell@google.com> +Acked-by: Yuchung Cheng <ycheng@google.com> +Reviewed-by: Tyler Hicks <tyhicks@canonical.com> +Cc: Bruce Curtis <brucec@netflix.com> +Cc: Jonathan Lemon <jonathan.lemon@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +[Salvatore Bonaccorso: Adjust context for backport to 4.9.168] +[bwh: Backported to 3.16: adjust context] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + include/uapi/linux/snmp.h | 1 + + net/ipv4/proc.c | 1 + + net/ipv4/tcp_output.c | 5 +++++ + 3 files changed, 7 insertions(+) + +--- a/include/uapi/linux/snmp.h ++++ b/include/uapi/linux/snmp.h +@@ -265,6 +265,7 @@ enum + LINUX_MIB_TCPWANTZEROWINDOWADV, /* TCPWantZeroWindowAdv */ + LINUX_MIB_TCPSYNRETRANS, /* TCPSynRetrans */ + LINUX_MIB_TCPORIGDATASENT, /* TCPOrigDataSent */ ++ LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */ + __LINUX_MIB_MAX + }; + +--- a/net/ipv4/proc.c ++++ b/net/ipv4/proc.c +@@ -286,6 +286,7 @@ static const struct snmp_mib snmp4_net_l + SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV), + SNMP_MIB_ITEM("TCPSynRetrans", LINUX_MIB_TCPSYNRETRANS), + SNMP_MIB_ITEM("TCPOrigDataSent", LINUX_MIB_TCPORIGDATASENT), ++ SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), + SNMP_MIB_SENTINEL + }; + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1090,6 +1090,11 @@ int tcp_fragment(struct sock *sk, struct + if (nsize < 0) + nsize = 0; + ++ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { ++ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); ++ return -ENOMEM; ++ } ++ + if (skb_unclone(skb, gfp)) + return -ENOMEM; + |