aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@kernel.org>2024-02-09 11:01:19 -0800
committerAndy Lutomirski <luto@kernel.org>2024-02-09 11:06:58 -0800
commitd3ed76fed22d68d297a7740671c0d010f8ca205d (patch)
treef2c430405590a63b5848e86e7676f5926feef719
parentd43c1f4d6d5b25a8395bd9bfa7d6947087c6ca1b (diff)
downloadlinux-net/recverr.tar.gz
[WIP] net/ip: Consolidate RECVERR-vs-sk_err handlingnet/recverr
Linux's current userspace semantics for setting sk_err on datagram sockets is complex, and it's implemented separately, in different ways, in several files. Consolidate the logic into a new helper and use it for all IP datagram socket types. This is IMO an improvement to the comprehensibility of the code, and it also makes implementing IP_RECVERR_QUIET much more straightforward. Ideally the ip_icmp_error()/ipv6_icmp_error() call would move to the helper as well, but it would get messy due to the ability to compile IPv6 as a module. Signed-off-by: Andy Lutomirski <luto@kernel.org>
-rw-r--r--include/net/icmp.h2
-rw-r--r--include/net/ip.h29
-rw-r--r--net/ipv4/icmp.c32
-rw-r--r--net/ipv4/ping.c20
-rw-r--r--net/ipv4/udp.c18
5 files changed, 63 insertions, 38 deletions
diff --git a/include/net/icmp.h b/include/net/icmp.h
index caddf4a59ad11..fa5c1ac398056 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -22,7 +22,7 @@
struct icmp_err {
int errno;
- unsigned int fatal:1;
+ bool fatal:1;
};
extern const struct icmp_err icmp_err_convert[];
diff --git a/include/net/ip.h b/include/net/ip.h
index 1fc4c8d69e333..05ecbabaea57f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -812,4 +812,33 @@ void ip_sock_set_recverr(struct sock *sk);
void ip_sock_set_tos(struct sock *sk, int val);
void __ip_sock_set_tos(struct sock *sk, int val);
+/*
+ * When an ICMP error is received that matches a datagram socket, some combination of
+ * queueing an IP_RECVERR-style message, setting sk_err, and signaling POLLERR needs
+ * to happen.
+ *
+ * Use like this:
+ *
+ * bool recverr = inet_test_bit(RECVERR, sk) [and account for ipv6]
+ * int err = ...;
+ * bool harderr; <- true if the error is 'hard', meaning that it should be reported
+ * for !RECVERR connected sockets
+ * if (recverr)
+ * ip_icmp_error() or ipv6_icmp_error() as appropriate.
+ * ip_error_report(sk, err, harderr, recverr);
+ */
+static inline bool ip_error_report(struct socket *sk, int err, bool harderr, bool recverr)
+{
+ /*
+ * Linux sets sk_err for connected sockets if the error is 'hard' (which makes
+ * perfect sense) and for all sockets if RECVERR[6] (which does not make very
+ * much sense). It does indeed make sense that sk_error_report(), which signals
+ * POLLERR, is called if RECVERR[6], though.
+ */
+ if ((harderr && sk->sk_state == TCP_ESTABLISHED) || recverr) {
+ sk->sk_err = err;
+ sk_error_report(sk);
+ }
+}
+
#endif /* _IP_H */
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e63a3bf996176..af765f042ed16 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -116,67 +116,67 @@ struct icmp_bxm {
const struct icmp_err icmp_err_convert[] = {
{
.errno = ENETUNREACH, /* ICMP_NET_UNREACH */
- .fatal = 0,
+ .fatal = false,
},
{
.errno = EHOSTUNREACH, /* ICMP_HOST_UNREACH */
- .fatal = 0,
+ .fatal = false,
},
{
.errno = ENOPROTOOPT /* ICMP_PROT_UNREACH */,
- .fatal = 1,
+ .fatal = true,
},
{
.errno = ECONNREFUSED, /* ICMP_PORT_UNREACH */
- .fatal = 1,
+ .fatal = true,
},
{
.errno = EMSGSIZE, /* ICMP_FRAG_NEEDED */
- .fatal = 0,
+ .fatal = false,
},
{
.errno = EOPNOTSUPP, /* ICMP_SR_FAILED */
- .fatal = 0,
+ .fatal = false,
},
{
.errno = ENETUNREACH, /* ICMP_NET_UNKNOWN */
- .fatal = 1,
+ .fatal = true,
},
{
.errno = EHOSTDOWN, /* ICMP_HOST_UNKNOWN */
- .fatal = 1,
+ .fatal = true,
},
{
.errno = ENONET, /* ICMP_HOST_ISOLATED */
- .fatal = 1,
+ .fatal = true,
},
{
.errno = ENETUNREACH, /* ICMP_NET_ANO */
- .fatal = 1,
+ .fatal = true,
},
{
.errno = EHOSTUNREACH, /* ICMP_HOST_ANO */
- .fatal = 1,
+ .fatal = true,
},
{
.errno = ENETUNREACH, /* ICMP_NET_UNR_TOS */
- .fatal = 0,
+ .fatal = false,
},
{
.errno = EHOSTUNREACH, /* ICMP_HOST_UNR_TOS */
- .fatal = 0,
+ .fatal = false,
},
{
.errno = EHOSTUNREACH, /* ICMP_PKT_FILTERED */
- .fatal = 1,
+ .fatal = true,
},
{
.errno = EHOSTUNREACH, /* ICMP_PREC_VIOLATION */
- .fatal = 1,
+ .fatal = true,
},
{
.errno = EHOSTUNREACH, /* ICMP_PREC_CUTOFF */
- .fatal = 1,
+ .fatal = true,
},
};
EXPORT_SYMBOL(icmp_err_convert);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 823306487a821..4926d2aa0a168 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -495,7 +495,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
int code;
struct net *net = dev_net(skb->dev);
struct sock *sk;
- int harderr;
+ bool harderr, recverr;
int err;
if (skb->protocol == htons(ETH_P_IP)) {
@@ -529,7 +529,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
pr_debug("err on socket %p\n", sk);
err = 0;
- harderr = 0;
+ harderr = false;
inet_sock = inet_sk(sk);
if (skb->protocol == htons(ETH_P_IP)) {
@@ -546,14 +546,14 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
break;
case ICMP_PARAMETERPROB:
err = EPROTO;
- harderr = 1;
+ harderr = true;
break;
case ICMP_DEST_UNREACH:
if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
ipv4_sk_update_pmtu(skb, sk, info);
if (READ_ONCE(inet_sock->pmtudisc) != IP_PMTUDISC_DONT) {
err = EMSGSIZE;
- harderr = 1;
+ harderr = true;
break;
}
goto out;
@@ -580,11 +580,9 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
- if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) ||
- (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) {
- if (!harderr || sk->sk_state != TCP_ESTABLISHED)
- goto out;
- } else {
+ bool recverr = (family == AF_INET && inet_test_bit(RECVERR, sk)) ||
+ (family == AF_INET6 && inet6_test_bit(RECVERR6, sk));
+ if (recverr) {
if (family == AF_INET) {
ip_icmp_error(sk, skb, err, 0 /* no remote port */,
info, (u8 *)icmph);
@@ -595,8 +593,8 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
#endif
}
}
- sk->sk_err = err;
- sk_error_report(sk);
+
+ ip_error_report(sk, err, harderr, recverr);
out:
return;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 89e5a806b82e9..4ba659ad8d0bb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -706,7 +706,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
const int code = icmp_hdr(skb)->code;
bool tunnel = false;
struct sock *sk;
- int harderr;
+ bool harderr, recverr;
int err;
struct net *net = dev_net(skb->dev);
@@ -733,7 +733,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
}
err = 0;
- harderr = 0;
+ harderr = false;
inet = inet_sk(sk);
switch (type) {
@@ -745,14 +745,14 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
goto out;
case ICMP_PARAMETERPROB:
err = EPROTO;
- harderr = 1;
+ harderr = true;
break;
case ICMP_DEST_UNREACH:
if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
ipv4_sk_update_pmtu(skb, sk, info);
if (READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT) {
err = EMSGSIZE;
- harderr = 1;
+ harderr = true;
break;
}
goto out;
@@ -779,14 +779,12 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
(u8 *)(uh+1));
goto out;
}
- if (!inet_test_bit(RECVERR, sk)) {
- if (!harderr || sk->sk_state != TCP_ESTABLISHED)
- goto out;
- } else
+
+ recverr = inet_test_bit(RECVERR, sk);
+ if (recverr)
ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
+ ip_error_report(sk, err, harderr, recverr);
- sk->sk_err = err;
- sk_error_report(sk);
out:
return 0;
}