diff options
author | Andy Lutomirski <luto@kernel.org> | 2024-02-09 11:01:19 -0800 |
---|---|---|
committer | Andy Lutomirski <luto@kernel.org> | 2024-02-09 11:06:58 -0800 |
commit | d3ed76fed22d68d297a7740671c0d010f8ca205d (patch) | |
tree | f2c430405590a63b5848e86e7676f5926feef719 | |
parent | d43c1f4d6d5b25a8395bd9bfa7d6947087c6ca1b (diff) | |
download | linux-net/recverr.tar.gz |
[WIP] net/ip: Consolidate RECVERR-vs-sk_err handlingnet/recverr
Linux's current userspace semantics for setting sk_err on datagram
sockets is complex, and it's implemented separately, in different ways,
in several files.
Consolidate the logic into a new helper and use it for all IP datagram
socket types.
This is IMO an improvement to the comprehensibility of the code, and
it also makes implementing IP_RECVERR_QUIET much more straightforward.
Ideally the ip_icmp_error()/ipv6_icmp_error() call would move to the
helper as well, but it would get messy due to the ability to compile
IPv6 as a module.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
-rw-r--r-- | include/net/icmp.h | 2 | ||||
-rw-r--r-- | include/net/ip.h | 29 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 32 | ||||
-rw-r--r-- | net/ipv4/ping.c | 20 | ||||
-rw-r--r-- | net/ipv4/udp.c | 18 |
5 files changed, 63 insertions, 38 deletions
diff --git a/include/net/icmp.h b/include/net/icmp.h index caddf4a59ad11..fa5c1ac398056 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -22,7 +22,7 @@ struct icmp_err { int errno; - unsigned int fatal:1; + bool fatal:1; }; extern const struct icmp_err icmp_err_convert[]; diff --git a/include/net/ip.h b/include/net/ip.h index 1fc4c8d69e333..05ecbabaea57f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -812,4 +812,33 @@ void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); void __ip_sock_set_tos(struct sock *sk, int val); +/* + * When an ICMP error is received that matches a datagram socket, some combination of + * queueing an IP_RECVERR-style message, setting sk_err, and signaling POLLERR needs + * to happen. + * + * Use like this: + * + * bool recverr = inet_test_bit(RECVERR, sk) [and account for ipv6] + * int err = ...; + * bool harderr; <- true if the error is 'hard', meaning that it should be reported + * for !RECVERR connected sockets + * if (recverr) + * ip_icmp_error() or ipv6_icmp_error() as appropriate. + * ip_error_report(sk, err, harderr, recverr); + */ +static inline bool ip_error_report(struct socket *sk, int err, bool harderr, bool recverr) +{ + /* + * Linux sets sk_err for connected sockets if the error is 'hard' (which makes + * perfect sense) and for all sockets if RECVERR[6] (which does not make very + * much sense). It does indeed make sense that sk_error_report(), which signals + * POLLERR, is called if RECVERR[6], though. + */ + if ((harderr && sk->sk_state == TCP_ESTABLISHED) || recverr) { + sk->sk_err = err; + sk_error_report(sk); + } +} + #endif /* _IP_H */ diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e63a3bf996176..af765f042ed16 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -116,67 +116,67 @@ struct icmp_bxm { const struct icmp_err icmp_err_convert[] = { { .errno = ENETUNREACH, /* ICMP_NET_UNREACH */ - .fatal = 0, + .fatal = false, }, { .errno = EHOSTUNREACH, /* ICMP_HOST_UNREACH */ - .fatal = 0, + .fatal = false, }, { .errno = ENOPROTOOPT /* ICMP_PROT_UNREACH */, - .fatal = 1, + .fatal = true, }, { .errno = ECONNREFUSED, /* ICMP_PORT_UNREACH */ - .fatal = 1, + .fatal = true, }, { .errno = EMSGSIZE, /* ICMP_FRAG_NEEDED */ - .fatal = 0, + .fatal = false, }, { .errno = EOPNOTSUPP, /* ICMP_SR_FAILED */ - .fatal = 0, + .fatal = false, }, { .errno = ENETUNREACH, /* ICMP_NET_UNKNOWN */ - .fatal = 1, + .fatal = true, }, { .errno = EHOSTDOWN, /* ICMP_HOST_UNKNOWN */ - .fatal = 1, + .fatal = true, }, { .errno = ENONET, /* ICMP_HOST_ISOLATED */ - .fatal = 1, + .fatal = true, }, { .errno = ENETUNREACH, /* ICMP_NET_ANO */ - .fatal = 1, + .fatal = true, }, { .errno = EHOSTUNREACH, /* ICMP_HOST_ANO */ - .fatal = 1, + .fatal = true, }, { .errno = ENETUNREACH, /* ICMP_NET_UNR_TOS */ - .fatal = 0, + .fatal = false, }, { .errno = EHOSTUNREACH, /* ICMP_HOST_UNR_TOS */ - .fatal = 0, + .fatal = false, }, { .errno = EHOSTUNREACH, /* ICMP_PKT_FILTERED */ - .fatal = 1, + .fatal = true, }, { .errno = EHOSTUNREACH, /* ICMP_PREC_VIOLATION */ - .fatal = 1, + .fatal = true, }, { .errno = EHOSTUNREACH, /* ICMP_PREC_CUTOFF */ - .fatal = 1, + .fatal = true, }, }; EXPORT_SYMBOL(icmp_err_convert); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 823306487a821..4926d2aa0a168 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -495,7 +495,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) int code; struct net *net = dev_net(skb->dev); struct sock *sk; - int harderr; + bool harderr, recverr; int err; if (skb->protocol == htons(ETH_P_IP)) { @@ -529,7 +529,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) pr_debug("err on socket %p\n", sk); err = 0; - harderr = 0; + harderr = false; inet_sock = inet_sk(sk); if (skb->protocol == htons(ETH_P_IP)) { @@ -546,14 +546,14 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) break; case ICMP_PARAMETERPROB: err = EPROTO; - harderr = 1; + harderr = true; break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ ipv4_sk_update_pmtu(skb, sk, info); if (READ_ONCE(inet_sock->pmtudisc) != IP_PMTUDISC_DONT) { err = EMSGSIZE; - harderr = 1; + harderr = true; break; } goto out; @@ -580,11 +580,9 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ - if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) || - (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) { - if (!harderr || sk->sk_state != TCP_ESTABLISHED) - goto out; - } else { + bool recverr = (family == AF_INET && inet_test_bit(RECVERR, sk)) || + (family == AF_INET6 && inet6_test_bit(RECVERR6, sk)); + if (recverr) { if (family == AF_INET) { ip_icmp_error(sk, skb, err, 0 /* no remote port */, info, (u8 *)icmph); @@ -595,8 +593,8 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) #endif } } - sk->sk_err = err; - sk_error_report(sk); + + ip_error_report(sk, err, harderr, recverr); out: return; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 89e5a806b82e9..4ba659ad8d0bb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -706,7 +706,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) const int code = icmp_hdr(skb)->code; bool tunnel = false; struct sock *sk; - int harderr; + bool harderr, recverr; int err; struct net *net = dev_net(skb->dev); @@ -733,7 +733,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) } err = 0; - harderr = 0; + harderr = false; inet = inet_sk(sk); switch (type) { @@ -745,14 +745,14 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) goto out; case ICMP_PARAMETERPROB: err = EPROTO; - harderr = 1; + harderr = true; break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ ipv4_sk_update_pmtu(skb, sk, info); if (READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT) { err = EMSGSIZE; - harderr = 1; + harderr = true; break; } goto out; @@ -779,14 +779,12 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) (u8 *)(uh+1)); goto out; } - if (!inet_test_bit(RECVERR, sk)) { - if (!harderr || sk->sk_state != TCP_ESTABLISHED) - goto out; - } else + + recverr = inet_test_bit(RECVERR, sk); + if (recverr) ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); + ip_error_report(sk, err, harderr, recverr); - sk->sk_err = err; - sk_error_report(sk); out: return 0; } |