aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig15
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/esp4.c26
-rw-r--r--net/ipv4/ipcomp.c5
-rw-r--r--net/ipv4/ipconfig.c16
-rw-r--r--net/ipv4/ipvs/Kconfig4
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c10
-rw-r--r--net/ipv4/netfilter.c9
-rw-r--r--net/ipv4/netfilter/Kconfig2
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c3
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c97
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c3
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_probe.c6
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_mode_beet.c139
17 files changed, 233 insertions, 109 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 30af4a4dfcc82c..5572071af735ea 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -64,7 +64,7 @@ config ASK_IP_FIB_HASH
config IP_FIB_TRIE
bool "FIB_TRIE"
---help---
- Use new experimental LC-trie as FIB lookup algoritm.
+ Use new experimental LC-trie as FIB lookup algorithm.
This improves lookup performance if you have a large
number of routes.
@@ -434,6 +434,15 @@ config INET_XFRM_MODE_TUNNEL
If unsure, say Y.
+config INET_XFRM_MODE_BEET
+ tristate "IP: IPsec BEET mode"
+ default y
+ select XFRM
+ ---help---
+ Support for IPsec BEET mode.
+
+ If unsure, say Y.
+
config INET_DIAG
tristate "INET: socket monitoring interface"
default y
@@ -526,7 +535,7 @@ config TCP_CONG_HYBLA
---help---
TCP-Hybla is a sender-side only change that eliminates penalization of
long-RTT, large-bandwidth connections, like when satellite legs are
- involved, expecially when sharing a common bottleneck with normal
+ involved, especially when sharing a common bottleneck with normal
terrestrial connections.
config TCP_CONG_VEGAS
@@ -556,7 +565,7 @@ config TCP_CONG_LP
default n
---help---
TCP Low Priority (TCP-LP), a distributed algorithm whose goal is
- to utiliza only the excess network bandwidth as compared to the
+ to utilize only the excess network bandwidth as compared to the
``fair share`` of bandwidth as targeted by TCP.
See http://www-ece.rice.edu/networks/TCP-LP/
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f66049e28aebba..15645c51520cc4 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_INET_AH) += ah4.o
obj-$(CONFIG_INET_ESP) += esp4.o
obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
+obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index cfe5c84742865c..cfb5d3de9c848c 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1,4 +1,4 @@
-/* linux/net/inet/arp.c
+/* linux/net/ipv4/arp.c
*
* Version: $Id: arp.c,v 1.99 2001/08/30 22:55:42 davem Exp $
*
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 13b29360d102f1..b5c205b5766982 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -253,7 +253,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
* as per draft-ietf-ipsec-udp-encaps-06,
* section 3.1.2
*/
- if (x->props.mode == XFRM_MODE_TRANSPORT)
+ if (x->props.mode == XFRM_MODE_TRANSPORT ||
+ x->props.mode == XFRM_MODE_BEET)
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
@@ -271,17 +272,28 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
-
- if (x->props.mode == XFRM_MODE_TUNNEL) {
- mtu = ALIGN(mtu + 2, blksize);
- } else {
- /* The worst case. */
+ int enclen = 0;
+
+ switch (x->props.mode) {
+ case XFRM_MODE_TUNNEL:
+ mtu = ALIGN(mtu +2, blksize);
+ break;
+ default:
+ case XFRM_MODE_TRANSPORT:
+ /* The worst case */
mtu = ALIGN(mtu + 2, 4) + blksize - 4;
+ break;
+ case XFRM_MODE_BEET:
+ /* The worst case. */
+ enclen = IPV4_BEET_PHMAXLEN;
+ mtu = ALIGN(mtu + enclen + 2, blksize);
+ break;
}
+
if (esp->conf.padlen)
mtu = ALIGN(mtu, esp->conf.padlen);
- return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+ return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen;
}
static void esp4_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 2017d36024d48d..3839b706142e38 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -206,6 +206,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
{
struct xfrm_state *t;
+ u8 mode = XFRM_MODE_TUNNEL;
t = xfrm_state_alloc();
if (t == NULL)
@@ -216,7 +217,9 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t->id.daddr.a4 = x->id.daddr.a4;
memcpy(&t->sel, &x->sel, sizeof(t->sel));
t->props.family = AF_INET;
- t->props.mode = XFRM_MODE_TUNNEL;
+ if (x->props.mode == XFRM_MODE_BEET)
+ mode = x->props.mode;
+ t->props.mode = mode;
t->props.saddr.a4 = x->props.saddr.a4;
t->props.flags = x->props.flags;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 1fbb38415b193e..f8ce8475915966 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -366,7 +366,7 @@ static int __init ic_defaults(void)
*/
if (!ic_host_name_set)
- sprintf(system_utsname.nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
+ sprintf(init_utsname()->nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
if (root_server_addr == INADDR_NONE)
root_server_addr = ic_servaddr;
@@ -805,7 +805,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
}
break;
case 12: /* Host name */
- ic_bootp_string(system_utsname.nodename, ext+1, *ext, __NEW_UTS_LEN);
+ ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN);
ic_host_name_set = 1;
break;
case 15: /* Domain name (DNS) */
@@ -816,7 +816,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path));
break;
case 40: /* NIS Domain name (_not_ DNS) */
- ic_bootp_string(system_utsname.domainname, ext+1, *ext, __NEW_UTS_LEN);
+ ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN);
break;
}
}
@@ -1368,7 +1368,7 @@ static int __init ip_auto_config(void)
printk(", mask=%u.%u.%u.%u", NIPQUAD(ic_netmask));
printk(", gw=%u.%u.%u.%u", NIPQUAD(ic_gateway));
printk(",\n host=%s, domain=%s, nis-domain=%s",
- system_utsname.nodename, ic_domain, system_utsname.domainname);
+ utsname()->nodename, ic_domain, utsname()->domainname);
printk(",\n bootserver=%u.%u.%u.%u", NIPQUAD(ic_servaddr));
printk(", rootserver=%u.%u.%u.%u", NIPQUAD(root_server_addr));
printk(", rootpath=%s", root_server_path);
@@ -1478,11 +1478,11 @@ static int __init ip_auto_config_setup(char *addrs)
case 4:
if ((dp = strchr(ip, '.'))) {
*dp++ = '\0';
- strlcpy(system_utsname.domainname, dp,
- sizeof(system_utsname.domainname));
+ strlcpy(utsname()->domainname, dp,
+ sizeof(utsname()->domainname));
}
- strlcpy(system_utsname.nodename, ip,
- sizeof(system_utsname.nodename));
+ strlcpy(utsname()->nodename, ip,
+ sizeof(utsname()->nodename));
ic_host_name_set = 1;
break;
case 5:
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index c9820bfc493a25..891b9355cf9666 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -81,7 +81,7 @@ config IP_VS_PROTO_ESP
bool "ESP load balancing support"
depends on IP_VS
---help---
- This option enables support for load balancing ESP (Encapsultion
+ This option enables support for load balancing ESP (Encapsulation
Security Payload) transport protocol. Say Y if unsure.
config IP_VS_PROTO_AH
@@ -204,7 +204,7 @@ config IP_VS_SED
connections to the server with the shortest expected delay. The
expected delay that the job will experience is (Ci + 1) / Ui if
sent to the ith server, in which Ci is the number of connections
- on the the ith server and Ui is the fixed service rate (weight)
+ on the ith server and Ui is the fixed service rate (weight)
of the ith server.
If you want to compile it in kernel, say Y. To compile it as a
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 6dee03935f7860..1445bb47fea4bc 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -813,6 +813,16 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
skb->nh.iph->saddr = cp->vaddr;
ip_send_check(skb->nh.iph);
+ /* For policy routing, packets originating from this
+ * machine itself may be routed differently to packets
+ * passing through. We want this packet to be routed as
+ * if it came from this machine itself. So re-compute
+ * the routing information.
+ */
+ if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
+ goto drop;
+ skb = *pskb;
+
IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 5ac15379a0cfd1..e2005c6810a4e3 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -8,7 +8,7 @@
#include <net/ip.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb)
+int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
{
struct iphdr *iph = (*pskb)->nh.iph;
struct rtable *rt;
@@ -16,10 +16,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
struct dst_entry *odst;
unsigned int hh_len;
+ if (addr_type == RTN_UNSPEC)
+ addr_type = inet_addr_type(iph->saddr);
+
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
* packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
*/
- if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
+ if (addr_type == RTN_LOCAL) {
fl.nl_u.ip4_u.daddr = iph->daddr;
fl.nl_u.ip4_u.saddr = iph->saddr;
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
@@ -156,7 +159,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
if (!(iph->tos == rt_info->tos
&& iph->daddr == rt_info->daddr
&& iph->saddr == rt_info->saddr))
- return ip_route_me_harder(pskb);
+ return ip_route_me_harder(pskb, RTN_UNSPEC);
}
return 0;
}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index a55b8ff70ded91..d88c292f118c68 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -373,7 +373,7 @@ config IP_NF_TARGET_ULOG
daemon using netlink multicast sockets; unlike the LOG target
which can only be viewed through syslog.
- The apropriate userspace logging daemon (ulogd) may be obtained from
+ The appropriate userspace logging daemon (ulogd) may be obtained from
<http://www.gnumonks.org/projects/ulogd/>
To compile it as a module, choose M here. If unsure, say N.
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 021395b674639b..d85d2de504497d 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -265,7 +265,8 @@ ip_nat_local_fn(unsigned int hooknum,
ct->tuplehash[!dir].tuple.src.u.all
#endif
)
- return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+ if (ip_route_me_harder(pskb, RTN_UNSPEC))
+ ret = NF_DROP;
}
return ret;
}
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index fd0c05efed8a08..ad0312d0e4fd6a 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -38,76 +38,16 @@ MODULE_DESCRIPTION("iptables REJECT target module");
#define DEBUGP(format, args...)
#endif
-static inline struct rtable *route_reverse(struct sk_buff *skb,
- struct tcphdr *tcph, int hook)
-{
- struct iphdr *iph = skb->nh.iph;
- struct dst_entry *odst;
- struct flowi fl = {};
- struct rtable *rt;
-
- /* We don't require ip forwarding to be enabled to be able to
- * send a RST reply for bridged traffic. */
- if (hook != NF_IP_FORWARD
-#ifdef CONFIG_BRIDGE_NETFILTER
- || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED)
-#endif
- ) {
- fl.nl_u.ip4_u.daddr = iph->saddr;
- if (hook == NF_IP_LOCAL_IN)
- fl.nl_u.ip4_u.saddr = iph->daddr;
- fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-
- if (ip_route_output_key(&rt, &fl) != 0)
- return NULL;
- } else {
- /* non-local src, find valid iif to satisfy
- * rp-filter when calling ip_route_input. */
- fl.nl_u.ip4_u.daddr = iph->daddr;
- if (ip_route_output_key(&rt, &fl) != 0)
- return NULL;
-
- odst = skb->dst;
- if (ip_route_input(skb, iph->saddr, iph->daddr,
- RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
- dst_release(&rt->u.dst);
- return NULL;
- }
- dst_release(&rt->u.dst);
- rt = (struct rtable *)skb->dst;
- skb->dst = odst;
-
- fl.nl_u.ip4_u.daddr = iph->saddr;
- fl.nl_u.ip4_u.saddr = iph->daddr;
- fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
- }
-
- if (rt->u.dst.error) {
- dst_release(&rt->u.dst);
- return NULL;
- }
-
- fl.proto = IPPROTO_TCP;
- fl.fl_ip_sport = tcph->dest;
- fl.fl_ip_dport = tcph->source;
- security_skb_classify_flow(skb, &fl);
-
- xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
-
- return rt;
-}
-
/* Send RST reply */
static void send_reset(struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
struct iphdr *iph = oldskb->nh.iph;
struct tcphdr _otcph, *oth, *tcph;
- struct rtable *rt;
__be16 tmp_port;
__be32 tmp_addr;
int needs_ack;
- int hh_len;
+ unsigned int addr_type;
/* IP header checks: fragment. */
if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
@@ -126,23 +66,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP))
return;
- if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
- return;
-
- hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
/* We need a linear, writeable skb. We also need to expand
headroom in case hh_len of incoming interface < hh_len of
outgoing interface */
- nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
+ nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb),
GFP_ATOMIC);
- if (!nskb) {
- dst_release(&rt->u.dst);
+ if (!nskb)
return;
- }
-
- dst_release(nskb->dst);
- nskb->dst = &rt->u.dst;
/* This packet will not be the same as the other: clear nf fields */
nf_reset(nskb);
@@ -184,6 +114,21 @@ static void send_reset(struct sk_buff *oldskb, int hook)
tcph->window = 0;
tcph->urg_ptr = 0;
+ /* Set DF, id = 0 */
+ nskb->nh.iph->frag_off = htons(IP_DF);
+ nskb->nh.iph->id = 0;
+
+ addr_type = RTN_UNSPEC;
+ if (hook != NF_IP_FORWARD
+#ifdef CONFIG_BRIDGE_NETFILTER
+ || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
+#endif
+ )
+ addr_type = RTN_LOCAL;
+
+ if (ip_route_me_harder(&nskb, addr_type))
+ goto free_nskb;
+
/* Adjust TCP checksum */
nskb->ip_summed = CHECKSUM_NONE;
tcph->check = 0;
@@ -192,12 +137,8 @@ static void send_reset(struct sk_buff *oldskb, int hook)
nskb->nh.iph->daddr,
csum_partial((char *)tcph,
sizeof(struct tcphdr), 0));
-
- /* Adjust IP TTL, DF */
+ /* Adjust IP TTL */
nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
- /* Set DF, id = 0 */
- nskb->nh.iph->frag_off = htons(IP_DF);
- nskb->nh.iph->id = 0;
/* Adjust IP checksum */
nskb->nh.iph->check = 0;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index e62ea2bb9c0ac4..b91f3582359bf3 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -157,7 +157,8 @@ ipt_local_hook(unsigned int hook,
|| (*pskb)->nfmark != nfmark
#endif
|| (*pskb)->nh.iph->tos != tos))
- return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+ if (ip_route_me_harder(pskb, RTN_UNSPEC))
+ ret = NF_DROP;
return ret;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3f884cea14ff43..cf06accbe687a9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2259,7 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
u32 pkts_acked = 0;
void (*rtt_sample)(struct sock *sk, u32 usrtt)
= icsk->icsk_ca_ops->rtt_sample;
- struct timeval tv;
+ struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
while ((skb = skb_peek(&sk->sk_write_queue)) &&
skb != sk->sk_send_head) {
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index dab37d2f65fcf0..4be336f17883ca 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -99,8 +99,10 @@ static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk,
}
static struct jprobe tcp_send_probe = {
- .kp = { .addr = (kprobe_opcode_t *) &tcp_sendmsg, },
- .entry = (kprobe_opcode_t *) &jtcp_sendmsg,
+ .kp = {
+ .symbol_name = "tcp_sendmsg",
+ },
+ .entry = JPROBE_ENTRY(jtcp_sendmsg),
};
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6d6142f9c478ba..865d75214a9ab1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -675,6 +675,8 @@ do_append_data:
udp_flush_pending_frames(sk);
else if (!corkreq)
err = udp_push_pending_frames(sk, up);
+ else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
+ up->pending = 0;
release_sock(sk);
out:
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
new file mode 100644
index 00000000000000..89cf59ea7bbe7a
--- /dev/null
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -0,0 +1,139 @@
+/*
+ * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4.
+ *
+ * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
+ * Miika Komu <miika@iki.fi>
+ * Herbert Xu <herbert@gondor.apana.org.au>
+ * Abhinav Pathak <abhinav.pathak@hiit.fi>
+ * Jeff Ahrenholz <ahrenholz@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
+ * The following fields in it shall be filled in by x->type->output:
+ * tot_len
+ * check
+ *
+ * On exit, skb->h will be set to the start of the payload to be processed
+ * by x->type->output and skb->nh will be set to the top IP header.
+ */
+static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct iphdr *iph, *top_iph = NULL;
+ int hdrlen, optlen;
+
+ iph = skb->nh.iph;
+ skb->h.ipiph = iph;
+
+ hdrlen = 0;
+ optlen = iph->ihl * 4 - sizeof(*iph);
+ if (unlikely(optlen))
+ hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
+
+ skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen);
+ top_iph = skb->nh.iph;
+ hdrlen = iph->ihl * 4 - optlen;
+ skb->h.raw += hdrlen;
+
+ memmove(top_iph, iph, hdrlen);
+ if (unlikely(optlen)) {
+ struct ip_beet_phdr *ph;
+
+ BUG_ON(optlen < 0);
+
+ ph = (struct ip_beet_phdr *)skb->h.raw;
+ ph->padlen = 4 - (optlen & 4);
+ ph->hdrlen = (optlen + ph->padlen + sizeof(*ph)) / 8;
+ ph->nexthdr = top_iph->protocol;
+
+ top_iph->protocol = IPPROTO_BEETPH;
+ top_iph->ihl = sizeof(struct iphdr) / 4;
+ }
+
+ top_iph->saddr = x->props.saddr.a4;
+ top_iph->daddr = x->id.daddr.a4;
+
+ return 0;
+}
+
+static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct iphdr *iph = skb->nh.iph;
+ int phlen = 0;
+ int optlen = 0;
+ __u8 ph_nexthdr = 0, protocol = 0;
+ int err = -EINVAL;
+
+ protocol = iph->protocol;
+
+ if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
+ struct ip_beet_phdr *ph = (struct ip_beet_phdr*)(iph + 1);
+
+ if (!pskb_may_pull(skb, sizeof(*ph)))
+ goto out;
+
+ phlen = ph->hdrlen * 8;
+ optlen = phlen - ph->padlen - sizeof(*ph);
+ if (optlen < 0 || optlen & 3 || optlen > 250)
+ goto out;
+
+ if (!pskb_may_pull(skb, phlen))
+ goto out;
+
+ ph_nexthdr = ph->nexthdr;
+ }
+
+ skb_push(skb, sizeof(*iph) - phlen + optlen);
+ memmove(skb->data, skb->nh.raw, sizeof(*iph));
+ skb->nh.raw = skb->data;
+
+ iph = skb->nh.iph;
+ iph->ihl = (sizeof(*iph) + optlen) / 4;
+ iph->tot_len = htons(skb->len);
+ iph->daddr = x->sel.daddr.a4;
+ iph->saddr = x->sel.saddr.a4;
+ if (ph_nexthdr)
+ iph->protocol = ph_nexthdr;
+ else
+ iph->protocol = protocol;
+ iph->check = 0;
+ iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+ err = 0;
+out:
+ return err;
+}
+
+static struct xfrm_mode xfrm4_beet_mode = {
+ .input = xfrm4_beet_input,
+ .output = xfrm4_beet_output,
+ .owner = THIS_MODULE,
+ .encap = XFRM_MODE_BEET,
+};
+
+static int __init xfrm4_beet_init(void)
+{
+ return xfrm_register_mode(&xfrm4_beet_mode, AF_INET);
+}
+
+static void __exit xfrm4_beet_exit(void)
+{
+ int err;
+
+ err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET);
+ BUG_ON(err);
+}
+
+module_init(xfrm4_beet_init);
+module_exit(xfrm4_beet_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET);