diff options
author | Zefan Li <lizefan@huawei.com> | 2016-10-11 01:06:56 +0800 |
---|---|---|
committer | Zefan Li <lizefan@huawei.com> | 2016-10-11 01:06:56 +0800 |
commit | 57e8a925b1089862c39363788460bbf6e9a3a93b (patch) | |
tree | 2e6454ac75750e4646a4d62eba37c40721ff20e3 | |
parent | 2741afd9367ae9b68488783ec33d03fb3869ff60 (diff) | |
download | linux-3.4.y-queue-57e8a925b1089862c39363788460bbf6e9a3a93b.tar.gz |
Add two fixes
-rw-r--r-- | patches/series | 2 | ||||
-rw-r--r-- | patches/tcp-make-challenge-acks-less-predictable.patch | 74 | ||||
-rw-r--r-- | patches/time-prevent-early-expiry-of-hrtimers-clock_realtime-at-the-leap-second-edge.patch | 271 |
3 files changed, 347 insertions, 0 deletions
diff --git a/patches/series b/patches/series index eedee77..c626b86 100644 --- a/patches/series +++ b/patches/series @@ -121,3 +121,5 @@ fix-incomplete-backport-of-commit-423f04d63cf4.patch fix-incomplete-backport-of-commit-0f792cf949a0.patch revert-usb-add-device-quirk-for-asus-t100-base-stati.patch revert-usb-add-otg-pet-device-to-tpl.patch +tcp-make-challenge-acks-less-predictable.patch +time-prevent-early-expiry-of-hrtimers-clock_realtime-at-the-leap-second-edge.patch diff --git a/patches/tcp-make-challenge-acks-less-predictable.patch b/patches/tcp-make-challenge-acks-less-predictable.patch new file mode 100644 index 0000000..bbba122 --- /dev/null +++ b/patches/tcp-make-challenge-acks-less-predictable.patch @@ -0,0 +1,74 @@ +From 75ff39ccc1bd5d3c455b6822ab09e533c551f758 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Sun, 10 Jul 2016 10:04:02 +0200 +Subject: tcp: make challenge acks less predictable + +commit 75ff39ccc1bd5d3c455b6822ab09e533c551f758 upstream. + +Yue Cao claims that current host rate limiting of challenge ACKS +(RFC 5961) could leak enough information to allow a patient attacker +to hijack TCP sessions. He will soon provide details in an academic +paper. + +This patch increases the default limit from 100 to 1000, and adds +some randomization so that the attacker can no longer hijack +sessions without spending a considerable amount of probes. + +Based on initial analysis and patch from Linus. + +Note that we also have per socket rate limiting, so it is tempting +to remove the host limit in the future. + +v2: randomize the count of challenge acks per second, not the period. + +Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2") +Reported-by: Yue Cao <ycao009@ucr.edu> +Signed-off-by: Eric Dumazet <edumazet@google.com> +Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Yuchung Cheng <ycheng@google.com> +Cc: Neal Cardwell <ncardwell@google.com> +Acked-by: Neal Cardwell <ncardwell@google.com> +Acked-by: Yuchung Cheng <ycheng@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +[lizf: Backported to 3.4: + - adjust context + - use ACCESS_ONCE instead WRITE_ONCE/READ_ONCE + - open-code prandom_u32_max()] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + net/ipv4/tcp_input.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -89,7 +89,7 @@ int sysctl_tcp_adv_win_scale __read_most + EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); + + /* rfc5961 challenge ack rate limiting */ +-int sysctl_tcp_challenge_ack_limit = 100; ++int sysctl_tcp_challenge_ack_limit = 1000; + + int sysctl_tcp_stdurg __read_mostly; + int sysctl_tcp_rfc1337 __read_mostly; +@@ -3701,13 +3701,18 @@ static void tcp_send_challenge_ack(struc + /* unprotected vars, we dont care of overwrites */ + static u32 challenge_timestamp; + static unsigned int challenge_count; +- u32 now = jiffies / HZ; ++ u32 count, now = jiffies / HZ; + + if (now != challenge_timestamp) { ++ u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; ++ + challenge_timestamp = now; +- challenge_count = 0; ++ ACCESS_ONCE(challenge_count) = half + ++ (u32)(((u64)random32() * sysctl_tcp_challenge_ack_limit) >> 32); + } +- if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { ++ count = ACCESS_ONCE(challenge_count); ++ if (count > 0) { ++ ACCESS_ONCE(challenge_count) = count - 1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + tcp_send_ack(sk); + } diff --git a/patches/time-prevent-early-expiry-of-hrtimers-clock_realtime-at-the-leap-second-edge.patch b/patches/time-prevent-early-expiry-of-hrtimers-clock_realtime-at-the-leap-second-edge.patch new file mode 100644 index 0000000..4944514 --- /dev/null +++ b/patches/time-prevent-early-expiry-of-hrtimers-clock_realtime-at-the-leap-second-edge.patch @@ -0,0 +1,271 @@ +From 833f32d763028c1bb371c64f457788b933773b3e Mon Sep 17 00:00:00 2001 +From: John Stultz <john.stultz@linaro.org> +Date: Thu, 11 Jun 2015 15:54:55 -0700 +Subject: time: Prevent early expiry of hrtimers[CLOCK_REALTIME] at the leap + second edge + +commit 833f32d763028c1bb371c64f457788b933773b3e upstream. + +Currently, leapsecond adjustments are done at tick time. As a result, +the leapsecond was applied at the first timer tick *after* the +leapsecond (~1-10ms late depending on HZ), rather then exactly on the +second edge. + +This was in part historical from back when we were always tick based, +but correcting this since has been avoided since it adds extra +conditional checks in the gettime fastpath, which has performance +overhead. + +However, it was recently pointed out that ABS_TIME CLOCK_REALTIME +timers set for right after the leapsecond could fire a second early, +since some timers may be expired before we trigger the timekeeping +timer, which then applies the leapsecond. + +This isn't quite as bad as it sounds, since behaviorally it is similar +to what is possible w/ ntpd made leapsecond adjustments done w/o using +the kernel discipline. Where due to latencies, timers may fire just +prior to the settimeofday call. (Also, one should note that all +applications using CLOCK_REALTIME timers should always be careful, +since they are prone to quirks from settimeofday() disturbances.) + +However, the purpose of having the kernel do the leap adjustment is to +avoid such latencies, so I think this is worth fixing. + +So in order to properly keep those timers from firing a second early, +this patch modifies the ntp and timekeeping logic so that we keep +enough state so that the update_base_offsets_now accessor, which +provides the hrtimer core the current time, can check and apply the +leapsecond adjustment on the second edge. This prevents the hrtimer +core from expiring timers too early. + +This patch does not modify any other time read path, so no additional +overhead is incurred. However, this also means that the leap-second +continues to be applied at tick time for all other read-paths. + +Apologies to Richard Cochran, who pushed for similar changes years +ago, which I resisted due to the concerns about the performance +overhead. + +While I suspect this isn't extremely critical, folks who care about +strict leap-second correctness will likely want to watch +this. Potentially a -stable candidate eventually. + +Originally-suggested-by: Richard Cochran <richardcochran@gmail.com> +Reported-by: Daniel Bristot de Oliveira <bristot@redhat.com> +Reported-by: Prarit Bhargava <prarit@redhat.com> +Signed-off-by: John Stultz <john.stultz@linaro.org> +Cc: Richard Cochran <richardcochran@gmail.com> +Cc: Jan Kara <jack@suse.cz> +Cc: Jiri Bohac <jbohac@suse.cz> +Cc: Shuah Khan <shuahkh@osg.samsung.com> +Cc: Ingo Molnar <mingo@kernel.org> +Link: http://lkml.kernel.org/r/1434063297-28657-4-git-send-email-john.stultz@linaro.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +[Yadi: Move do_adjtimex to timekeeping.c and solve context issues] +Signed-off-by: Hu <yadi.hu@windriver.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + kernel/time/ntp.c | 45 ++++++++++++++++++++++++++++++++++++++------- + kernel/time/timekeeping.c | 37 +++++++++++++++++++++++++++++++++++-- + 2 files changed, 73 insertions(+), 9 deletions(-) + +--- a/kernel/time/ntp.c ++++ b/kernel/time/ntp.c +@@ -34,6 +34,7 @@ unsigned long tick_nsec; + static u64 tick_length; + static u64 tick_length_base; + ++#define SECS_PER_DAY 86400 + #define MAX_TICKADJ 500LL /* usecs */ + #define MAX_TICKADJ_SCALED \ + (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) +@@ -78,6 +79,9 @@ static long time_adjust; + /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ + static s64 ntp_tick_adj; + ++/* second value of the next pending leapsecond, or KTIME_MAX if no leap */ ++static s64 ntp_next_leap_sec = KTIME_MAX; ++ + #ifdef CONFIG_NTP_PPS + + /* +@@ -354,6 +358,8 @@ void ntp_clear(void) + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + ++ ntp_next_leap_sec = KTIME_MAX; ++ + ntp_update_frequency(); + + tick_length = tick_length_base; +@@ -377,6 +383,21 @@ u64 ntp_tick_length(void) + return ret; + } + ++/** ++ * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t ++ * ++ * Provides the time of the next leapsecond against CLOCK_REALTIME in ++ * a ktime_t format. Returns KTIME_MAX if no leapsecond is pending. ++ */ ++ktime_t ntp_get_next_leap(void) ++{ ++ ktime_t ret; ++ ++ if ((time_state == TIME_INS) && (time_status & STA_INS)) ++ return ktime_set(ntp_next_leap_sec, 0); ++ ret.tv64 = KTIME_MAX; ++ return ret; ++} + + /* + * this routine handles the overflow of the microsecond field +@@ -403,15 +424,21 @@ int second_overflow(unsigned long secs) + */ + switch (time_state) { + case TIME_OK: +- if (time_status & STA_INS) ++ if (time_status & STA_INS) { + time_state = TIME_INS; +- else if (time_status & STA_DEL) ++ ntp_next_leap_sec = secs + SECS_PER_DAY - ++ (secs % SECS_PER_DAY); ++ } else if (time_status & STA_DEL) { + time_state = TIME_DEL; ++ ntp_next_leap_sec = secs + SECS_PER_DAY - ++ ((secs+1) % SECS_PER_DAY); ++ } + break; + case TIME_INS: +- if (!(time_status & STA_INS)) ++ if (!(time_status & STA_INS)) { ++ ntp_next_leap_sec = KTIME_MAX; + time_state = TIME_OK; +- else if (secs % 86400 == 0) { ++ } else if (secs % SECS_PER_DAY == 0) { + leap = -1; + time_state = TIME_OOP; + time_tai++; +@@ -420,10 +447,12 @@ int second_overflow(unsigned long secs) + } + break; + case TIME_DEL: +- if (!(time_status & STA_DEL)) ++ if (!(time_status & STA_DEL)) { ++ ntp_next_leap_sec = KTIME_MAX; + time_state = TIME_OK; +- else if ((secs + 1) % 86400 == 0) { ++ } else if ((secs + 1) % SECS_PER_DAY == 0) { + leap = 1; ++ ntp_next_leap_sec = KTIME_MAX; + time_tai--; + time_state = TIME_WAIT; + printk(KERN_NOTICE +@@ -431,6 +460,7 @@ int second_overflow(unsigned long secs) + } + break; + case TIME_OOP: ++ ntp_next_leap_sec = KTIME_MAX; + time_state = TIME_WAIT; + break; + +@@ -549,6 +579,7 @@ static inline void process_adj_status(st + if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { + time_state = TIME_OK; + time_status = STA_UNSYNC; ++ ntp_next_leap_sec = KTIME_MAX; + /* restart PPS frequency calibration */ + pps_reset_freq_interval(); + } +@@ -619,7 +650,7 @@ static inline void process_adjtimex_mode + * adjtimex mainly allows reading (and writing, if superuser) of + * kernel time-keeping variables. used by xntpd. + */ +-int do_adjtimex(struct timex *txc) ++int __do_adjtimex(struct timex *txc) + { + struct timespec ts; + int result; +--- a/kernel/time/timekeeping.c ++++ b/kernel/time/timekeeping.c +@@ -21,6 +21,9 @@ + #include <linux/tick.h> + #include <linux/stop_machine.h> + ++extern ktime_t ntp_get_next_leap(void); ++extern int __do_adjtimex(struct timex *); ++ + /* Structure holding internal timekeeping values. */ + struct timekeeper { + /* Current clocksource used for timekeeping. */ +@@ -30,6 +33,8 @@ struct timekeeper { + /* The shift value of the current clocksource. */ + int shift; + ++ /* CLOCK_MONOTONIC time value of a pending leap-second*/ ++ ktime_t next_leap_ktime; + /* Number of clock cycles in one NTP interval. */ + cycle_t cycle_interval; + /* Number of clock shifted nano seconds in one NTP interval. */ +@@ -186,6 +191,17 @@ static void update_rt_offset(void) + timekeeper.offs_real = timespec_to_ktime(tmp); + } + ++/* ++ * tk_update_leap_state - helper to update the next_leap_ktime ++ */ ++static inline void tk_update_leap_state(struct timekeeper *tk) ++{ ++ tk->next_leap_ktime = ntp_get_next_leap(); ++ if (tk->next_leap_ktime.tv64 != KTIME_MAX) ++ /* Convert to monotonic time */ ++ tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real); ++} ++ + /* must hold write on timekeeper.lock */ + static void timekeeping_update(bool clearntp) + { +@@ -193,6 +209,7 @@ static void timekeeping_update(bool clea + timekeeper.ntp_error = 0; + ntp_clear(); + } ++ tk_update_leap_state(&timekeeper); + update_rt_offset(); + update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, + timekeeper.clock, timekeeper.mult); +@@ -1329,10 +1346,16 @@ ktime_t ktime_get_update_offsets(ktime_t + + *offs_real = timekeeper.offs_real; + *offs_boot = timekeeper.offs_boot; ++ ++ now = ktime_add_ns(ktime_set(secs, 0), nsecs); ++ now = ktime_sub(now, *offs_real); ++ ++ /* Handle leapsecond insertion adjustments */ ++ if (unlikely(now.tv64 >= timekeeper.next_leap_ktime.tv64)) ++ *offs_real = ktime_sub(timekeeper.offs_real, ktime_set(1, 0)); ++ + } while (read_seqretry(&timekeeper.lock, seq)); + +- now = ktime_add_ns(ktime_set(secs, 0), nsecs); +- now = ktime_sub(now, *offs_real); + return now; + } + #endif +@@ -1354,6 +1377,16 @@ ktime_t ktime_get_monotonic_offset(void) + } + EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); + ++/* ++ * do_adjtimex() - Accessor function to NTP __do_adjtimex function ++ */ ++int do_adjtimex(struct timex *txc) ++{ ++ int ret; ++ ret = __do_adjtimex(txc); ++ tk_update_leap_state(&timekeeper); ++ return ret; ++} + + /** + * xtime_update() - advances the timekeeping infrastructure |