From f1da70632fa0875f80fc60991a010c31f40983ff Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 2 Oct 2006 16:10:47 -0700
Subject: [NETFILTER]: Kconfig: fix xt_physdev dependencies

xt_physdev depends on bridge netfilter, which is a boolean, but can still
be built modular because of special handling in the bridge makefile. Add
a dependency on BRIDGE to prevent XT_MATCH_PHYSDEV=y, BRIDGE=m.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0a28d2c5c44fc..ce94732b8e231 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -365,7 +365,7 @@ config NETFILTER_XT_MATCH_MULTIPORT
 
 config NETFILTER_XT_MATCH_PHYSDEV
 	tristate '"physdev" match support'
-	depends on NETFILTER_XTABLES && BRIDGE_NETFILTER
+	depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER
 	help
 	  Physdev packet matching matches against the physical bridge ports
 	  the IP packet arrived on or will leave by.
-- 
cgit 1.2.3-korg


From b4c4ed175ff0ee816df48571cfa9b73f521964b6 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 2 Oct 2006 16:11:13 -0700
Subject: [NETFILTER]: add type parameter to ip_route_me_harder

By adding a type parameter to ip_route_me_harder() the
expensive call to inet_addr_type() can be avoided in some cases.
A followup patch where ip_route_me_harder() is called from within
ip_vs_out() is one such example.

Signed-off-By: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4.h         | 2 +-
 net/ipv4/netfilter.c                   | 9 ++++++---
 net/ipv4/netfilter/ip_nat_standalone.c | 3 ++-
 net/ipv4/netfilter/iptable_mangle.c    | 3 ++-
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index ce02c984f3bae..5b63a231a76bb 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -77,7 +77,7 @@ enum nf_ip_hook_priorities {
 #define SO_ORIGINAL_DST 80
 
 #ifdef __KERNEL__
-extern int ip_route_me_harder(struct sk_buff **pskb);
+extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type);
 extern int ip_xfrm_me_harder(struct sk_buff **pskb);
 extern unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 				   unsigned int dataoff, u_int8_t protocol);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 5ac15379a0cfd..e2005c6810a4e 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -8,7 +8,7 @@
 #include <net/ip.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb)
+int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
 {
 	struct iphdr *iph = (*pskb)->nh.iph;
 	struct rtable *rt;
@@ -16,10 +16,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
 	struct dst_entry *odst;
 	unsigned int hh_len;
 
+	if (addr_type == RTN_UNSPEC)
+		addr_type = inet_addr_type(iph->saddr);
+
 	/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
 	 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
 	 */
-	if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
+	if (addr_type == RTN_LOCAL) {
 		fl.nl_u.ip4_u.daddr = iph->daddr;
 		fl.nl_u.ip4_u.saddr = iph->saddr;
 		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
@@ -156,7 +159,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
 		if (!(iph->tos == rt_info->tos
 		      && iph->daddr == rt_info->daddr
 		      && iph->saddr == rt_info->saddr))
-			return ip_route_me_harder(pskb);
+			return ip_route_me_harder(pskb, RTN_UNSPEC);
 	}
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 021395b674639..d85d2de504497 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -265,7 +265,8 @@ ip_nat_local_fn(unsigned int hooknum,
 		       ct->tuplehash[!dir].tuple.src.u.all
 #endif
 		    )
-			return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+			if (ip_route_me_harder(pskb, RTN_UNSPEC))
+				ret = NF_DROP;
 	}
 	return ret;
 }
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index e62ea2bb9c0ac..b91f3582359bf 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -157,7 +157,8 @@ ipt_local_hook(unsigned int hook,
 		|| (*pskb)->nfmark != nfmark
 #endif
 		|| (*pskb)->nh.iph->tos != tos))
-		return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+		if (ip_route_me_harder(pskb, RTN_UNSPEC))
+			ret = NF_DROP;
 
 	return ret;
 }
-- 
cgit 1.2.3-korg


From 901eaf6c8f997f18ebc8fcbb85411c79161ab3b2 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 2 Oct 2006 16:11:51 -0700
Subject: [NETFILTER]: Honour source routing for LVS-NAT

For policy routing, packets originating from this machine itself may be
routed differently to packets passing through. We want this packet to be
routed as if it came from this machine itself. So re-compute the routing
information using ip_route_me_harder().

This patch is derived from work by Ken Brownfield

Cc: Ken Brownfield <krb@irridia.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_core.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 6dee03935f786..1445bb47fea4b 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -813,6 +813,16 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 	skb->nh.iph->saddr = cp->vaddr;
 	ip_send_check(skb->nh.iph);
 
+ 	/* For policy routing, packets originating from this
+ 	 * machine itself may be routed differently to packets
+ 	 * passing through.  We want this packet to be routed as
+ 	 * if it came from this machine itself.  So re-compute
+ 	 * the routing information.
+ 	 */
+ 	if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
+ 		goto drop;
+	skb = *pskb;
+
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
 
 	ip_vs_out_stats(cp, skb);
-- 
cgit 1.2.3-korg


From 9d02002d2dc2c7423e5891b97727fde4d667adf1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 2 Oct 2006 16:12:20 -0700
Subject: [NETFILTER]: ipt_REJECT: remove largely duplicate route_reverse
 function

Use ip_route_me_harder instead, which now allows to specify how we wish
the packet to be routed.

Based on patch by Simon Horman <horms@verge.net.au>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_REJECT.c | 97 ++++++++---------------------------------
 1 file changed, 19 insertions(+), 78 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index fd0c05efed8a0..ad0312d0e4fd6 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -38,76 +38,16 @@ MODULE_DESCRIPTION("iptables REJECT target module");
 #define DEBUGP(format, args...)
 #endif
 
-static inline struct rtable *route_reverse(struct sk_buff *skb, 
-					   struct tcphdr *tcph, int hook)
-{
-	struct iphdr *iph = skb->nh.iph;
-	struct dst_entry *odst;
-	struct flowi fl = {};
-	struct rtable *rt;
-
-	/* We don't require ip forwarding to be enabled to be able to
-	 * send a RST reply for bridged traffic. */
-	if (hook != NF_IP_FORWARD
-#ifdef CONFIG_BRIDGE_NETFILTER
-	    || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED)
-#endif
-	   ) {
-		fl.nl_u.ip4_u.daddr = iph->saddr;
-		if (hook == NF_IP_LOCAL_IN)
-			fl.nl_u.ip4_u.saddr = iph->daddr;
-		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-
-		if (ip_route_output_key(&rt, &fl) != 0)
-			return NULL;
-	} else {
-		/* non-local src, find valid iif to satisfy
-		 * rp-filter when calling ip_route_input. */
-		fl.nl_u.ip4_u.daddr = iph->daddr;
-		if (ip_route_output_key(&rt, &fl) != 0)
-			return NULL;
-
-		odst = skb->dst;
-		if (ip_route_input(skb, iph->saddr, iph->daddr,
-		                   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
-			dst_release(&rt->u.dst);
-			return NULL;
-		}
-		dst_release(&rt->u.dst);
-		rt = (struct rtable *)skb->dst;
-		skb->dst = odst;
-
-		fl.nl_u.ip4_u.daddr = iph->saddr;
-		fl.nl_u.ip4_u.saddr = iph->daddr;
-		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-	}
-
-	if (rt->u.dst.error) {
-		dst_release(&rt->u.dst);
-		return NULL;
-	}
-
-	fl.proto = IPPROTO_TCP;
-	fl.fl_ip_sport = tcph->dest;
-	fl.fl_ip_dport = tcph->source;
-	security_skb_classify_flow(skb, &fl);
-
-	xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
-
-	return rt;
-}
-
 /* Send RST reply */
 static void send_reset(struct sk_buff *oldskb, int hook)
 {
 	struct sk_buff *nskb;
 	struct iphdr *iph = oldskb->nh.iph;
 	struct tcphdr _otcph, *oth, *tcph;
-	struct rtable *rt;
 	__be16 tmp_port;
 	__be32 tmp_addr;
 	int needs_ack;
-	int hh_len;
+	unsigned int addr_type;
 
 	/* IP header checks: fragment. */
 	if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
@@ -126,23 +66,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP))
 		return;
 
-	if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
-		return;
-
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
 	/* We need a linear, writeable skb.  We also need to expand
 	   headroom in case hh_len of incoming interface < hh_len of
 	   outgoing interface */
-	nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
+	nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb),
 			       GFP_ATOMIC);
-	if (!nskb) {
-		dst_release(&rt->u.dst);
+	if (!nskb)
 		return;
-	}
-
-	dst_release(nskb->dst);
-	nskb->dst = &rt->u.dst;
 
 	/* This packet will not be the same as the other: clear nf fields */
 	nf_reset(nskb);
@@ -184,6 +114,21 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	tcph->window = 0;
 	tcph->urg_ptr = 0;
 
+	/* Set DF, id = 0 */
+	nskb->nh.iph->frag_off = htons(IP_DF);
+	nskb->nh.iph->id = 0;
+
+	addr_type = RTN_UNSPEC;
+	if (hook != NF_IP_FORWARD
+#ifdef CONFIG_BRIDGE_NETFILTER
+	    || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
+#endif
+	   )
+		addr_type = RTN_LOCAL;
+
+	if (ip_route_me_harder(&nskb, addr_type))
+		goto free_nskb;
+
 	/* Adjust TCP checksum */
 	nskb->ip_summed = CHECKSUM_NONE;
 	tcph->check = 0;
@@ -192,12 +137,8 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 				   nskb->nh.iph->daddr,
 				   csum_partial((char *)tcph,
 						sizeof(struct tcphdr), 0));
-
-	/* Adjust IP TTL, DF */
+	/* Adjust IP TTL */
 	nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
-	/* Set DF, id = 0 */
-	nskb->nh.iph->frag_off = htons(IP_DF);
-	nskb->nh.iph->id = 0;
 
 	/* Adjust IP checksum */
 	nskb->nh.iph->check = 0;
-- 
cgit 1.2.3-korg


From b18dfa90c008850e0f3bfd63638dd8fbe8e08701 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Mon, 2 Oct 2006 16:12:52 -0700
Subject: [NETFILTER]: ebt_mark: add or/and/xor action support to mark target

The following patch adds or/and/xor functionality for the mark target,
while staying backwards compatible.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_bridge/ebt_mark_t.h | 12 ++++++++++++
 net/bridge/netfilter/ebt_mark.c             | 21 +++++++++++++++++----
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/include/linux/netfilter_bridge/ebt_mark_t.h b/include/linux/netfilter_bridge/ebt_mark_t.h
index 110fec6a40a2b..6270f6f336936 100644
--- a/include/linux/netfilter_bridge/ebt_mark_t.h
+++ b/include/linux/netfilter_bridge/ebt_mark_t.h
@@ -1,6 +1,18 @@
 #ifndef __LINUX_BRIDGE_EBT_MARK_T_H
 #define __LINUX_BRIDGE_EBT_MARK_T_H
 
+/* The target member is reused for adding new actions, the
+ * value of the real target is -1 to -NUM_STANDARD_TARGETS.
+ * For backward compatibility, the 4 lsb (2 would be enough,
+ * but let's play it safe) are kept to designate this target.
+ * The remaining bits designate the action. By making the set
+ * action 0xfffffff0, the result will look ok for older
+ * versions. [September 2006] */
+#define MARK_SET_VALUE (0xfffffff0)
+#define MARK_OR_VALUE  (0xffffffe0)
+#define MARK_AND_VALUE (0xffffffd0)
+#define MARK_XOR_VALUE (0xffffffc0)
+
 struct ebt_mark_t_info
 {
 	unsigned long mark;
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 770c0df972a3b..b54306a934e58 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -22,24 +22,37 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
    const void *data, unsigned int datalen)
 {
 	struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
+	int action = info->target & -16;
 
-	if ((*pskb)->nfmark != info->mark)
+	if (action == MARK_SET_VALUE)
 		(*pskb)->nfmark = info->mark;
+	else if (action == MARK_OR_VALUE)
+		(*pskb)->nfmark |= info->mark;
+	else if (action == MARK_AND_VALUE)
+		(*pskb)->nfmark &= info->mark;
+	else
+		(*pskb)->nfmark ^= info->mark;
 
-	return info->target;
+	return info->target | -16;
 }
 
 static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
+	int tmp;
 
 	if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info)))
 		return -EINVAL;
-	if (BASE_CHAIN && info->target == EBT_RETURN)
+	tmp = info->target | -16;
+	if (BASE_CHAIN && tmp == EBT_RETURN)
 		return -EINVAL;
 	CLEAR_BASE_CHAIN_BIT;
-	if (INVALID_TARGET)
+	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+		return -EINVAL;
+	tmp = info->target & -16;
+	if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
+	    tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE)
 		return -EINVAL;
 	return 0;
 }
-- 
cgit 1.2.3-korg


From 81771b3b20fb4e98c6f2b2aac2bc10ed41a8f006 Mon Sep 17 00:00:00 2001
From: Ismail Donmez <ismail@pardus.org.tr>
Date: Tue, 3 Oct 2006 13:49:10 -0700
Subject: [NET_SCHED]: Revert "HTB: fix incorrect use of RB_EMPTY_NODE"

With commit 10fd48f2376db52f08bf0420d2c4f580e39269e1 [1] ,  RB_EMPTY_NODE
changed behaviour so it returns true when the node is empty as expected.
Hence Patrick McHardy's fix for sched_htb.c should be reverted.

Signed-off-by: Ismail Donmez <ismail@pardus.org.tr>
ACKed-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 6c058e3660c0c..bb3ddd4784b1c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -391,7 +391,7 @@ static inline void htb_add_class_to_row(struct htb_sched *q,
 /* If this triggers, it is a bug in this code, but it need not be fatal */
 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 {
-	if (!RB_EMPTY_NODE(rb)) {
+	if (RB_EMPTY_NODE(rb)) {
 		WARN_ON(1);
 	} else {
 		rb_erase(rb, root);
-- 
cgit 1.2.3-korg


From 132a55f3c5c0b1a364d32f65595ad8838c30a60e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 Oct 2006 14:34:00 -0700
Subject: [UDP6]: Fix flowi clobbering

The udp6_sendmsg function uses a shared buffer to store the
flow without taking any locks.  This leads to races with SMP.
This patch moves the flowi object onto the stack.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 62 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9662561701d1c..552ec0f449af1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -546,7 +546,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	struct in6_addr *daddr, *final_p = NULL, final;
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
-	struct flowi *fl = &inet->cork.fl;
+	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_len = msg->msg_namelen;
 	int ulen = len;
@@ -626,19 +626,19 @@ do_udp_sendmsg:
 	}
 	ulen += sizeof(struct udphdr);
 
-	memset(fl, 0, sizeof(*fl));
+	memset(&fl, 0, sizeof(fl));
 
 	if (sin6) {
 		if (sin6->sin6_port == 0)
 			return -EINVAL;
 
-		fl->fl_ip_dport = sin6->sin6_port;
+		fl.fl_ip_dport = sin6->sin6_port;
 		daddr = &sin6->sin6_addr;
 
 		if (np->sndflow) {
-			fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-			if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
-				flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+			fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+			if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+				flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
 				if (flowlabel == NULL)
 					return -EINVAL;
 				daddr = &flowlabel->dst;
@@ -656,32 +656,32 @@ do_udp_sendmsg:
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    sin6->sin6_scope_id &&
 		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
-			fl->oif = sin6->sin6_scope_id;
+			fl.oif = sin6->sin6_scope_id;
 	} else {
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 
-		fl->fl_ip_dport = inet->dport;
+		fl.fl_ip_dport = inet->dport;
 		daddr = &np->daddr;
-		fl->fl6_flowlabel = np->flow_label;
+		fl.fl6_flowlabel = np->flow_label;
 		connected = 1;
 	}
 
-	if (!fl->oif)
-		fl->oif = sk->sk_bound_dev_if;
+	if (!fl.oif)
+		fl.oif = sk->sk_bound_dev_if;
 
 	if (msg->msg_controllen) {
 		opt = &opt_space;
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(*opt);
 
-		err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
 		}
-		if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
-			flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+		if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
 		}
@@ -695,39 +695,39 @@ do_udp_sendmsg:
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 	opt = ipv6_fixup_options(&opt_space, opt);
 
-	fl->proto = IPPROTO_UDP;
-	ipv6_addr_copy(&fl->fl6_dst, daddr);
-	if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr))
-		ipv6_addr_copy(&fl->fl6_src, &np->saddr);
-	fl->fl_ip_sport = inet->sport;
+	fl.proto = IPPROTO_UDP;
+	ipv6_addr_copy(&fl.fl6_dst, daddr);
+	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
+		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+	fl.fl_ip_sport = inet->sport;
 	
 	/* merge ip6_build_xmit from ip6_output */
 	if (opt && opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl->fl6_dst);
-		ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
+		ipv6_addr_copy(&final, &fl.fl6_dst);
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 		final_p = &final;
 		connected = 0;
 	}
 
-	if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) {
-		fl->oif = np->mcast_oif;
+	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
+		fl.oif = np->mcast_oif;
 		connected = 0;
 	}
 
-	security_sk_classify_flow(sk, fl);
+	security_sk_classify_flow(sk, &fl);
 
-	err = ip6_sk_dst_lookup(sk, &dst, fl);
+	err = ip6_sk_dst_lookup(sk, &dst, &fl);
 	if (err)
 		goto out;
 	if (final_p)
-		ipv6_addr_copy(&fl->fl6_dst, final_p);
+		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0)
+	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
 		goto out;
 
 	if (hlimit < 0) {
-		if (ipv6_addr_is_multicast(&fl->fl6_dst))
+		if (ipv6_addr_is_multicast(&fl.fl6_dst))
 			hlimit = np->mcast_hops;
 		else
 			hlimit = np->hop_limit;
@@ -763,7 +763,7 @@ back_from_confirm:
 do_append_data:
 	up->len += ulen;
 	err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen,
-		sizeof(struct udphdr), hlimit, tclass, opt, fl,
+		sizeof(struct udphdr), hlimit, tclass, opt, &fl,
 		(struct rt6_info*)dst,
 		corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
 	if (err)
@@ -774,10 +774,10 @@ do_append_data:
 	if (dst) {
 		if (connected) {
 			ip6_dst_store(sk, dst,
-				      ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
+				      ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
 				      &np->daddr : NULL,
 #ifdef CONFIG_IPV6_SUBTREES
-				      ipv6_addr_equal(&fl->fl6_src, &np->saddr) ?
+				      ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
 				      &np->saddr :
 #endif
 				      NULL);
-- 
cgit 1.2.3-korg


From 1e0c14f49d6b393179f423abbac47f85618d3d46 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 Oct 2006 14:35:49 -0700
Subject: [UDP]: Fix MSG_PROBE crash

UDP tracks corking status through the pending variable.  The
IP layer also tracks it through the socket write queue.  It
is possible for the two to get out of sync when MSG_PROBE is
used.

This patch changes UDP to check the write queue to ensure
that the two stay in sync.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 2 ++
 net/ipv6/udp.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6d6142f9c478b..865d75214a9ab 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -675,6 +675,8 @@ do_append_data:
 		udp_flush_pending_frames(sk);
 	else if (!corkreq)
 		err = udp_push_pending_frames(sk, up);
+	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
+		up->pending = 0;
 	release_sock(sk);
 
 out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 552ec0f449af1..e0c3934a7e4bd 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -770,6 +770,8 @@ do_append_data:
 		udp_v6_flush_pending_frames(sk);
 	else if (!corkreq)
 		err = udp_v6_push_pending_frames(sk, up);
+	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
+		up->pending = 0;
 
 	if (dst) {
 		if (connected) {
-- 
cgit 1.2.3-korg


From c5e29460f5f9eb189cab5d9fdaa137e64f7734b6 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Tue, 3 Oct 2006 15:49:46 -0700
Subject: [NEIGH]: always use hash_mask under tbl lock

Make sure hash_mask is protected with tbl->lock in all cases just like
the hash_buckets.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8ce8c471d8687..b4b478353b27e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -344,12 +344,12 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 {
 	struct neighbour *n;
 	int key_len = tbl->key_len;
-	u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+	u32 hash_val = tbl->hash(pkey, dev);
 	
 	NEIGH_CACHE_STAT_INC(tbl, lookups);
 
 	read_lock_bh(&tbl->lock);
-	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
 			neigh_hold(n);
 			NEIGH_CACHE_STAT_INC(tbl, hits);
@@ -364,12 +364,12 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
 {
 	struct neighbour *n;
 	int key_len = tbl->key_len;
-	u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask;
+	u32 hash_val = tbl->hash(pkey, NULL);
 
 	NEIGH_CACHE_STAT_INC(tbl, lookups);
 
 	read_lock_bh(&tbl->lock);
-	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
 		if (!memcmp(n->primary_key, pkey, key_len)) {
 			neigh_hold(n);
 			NEIGH_CACHE_STAT_INC(tbl, hits);
@@ -1998,12 +1998,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 	int rc, h, s_h = cb->args[1];
 	int idx, s_idx = idx = cb->args[2];
 
+	read_lock_bh(&tbl->lock);
 	for (h = 0; h <= tbl->hash_mask; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
 			s_idx = 0;
-		read_lock_bh(&tbl->lock);
 		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
 			if (idx < s_idx)
 				continue;
@@ -2016,8 +2016,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 				goto out;
 			}
 		}
-		read_unlock_bh(&tbl->lock);
 	}
+	read_unlock_bh(&tbl->lock);
 	rc = skb->len;
 out:
 	cb->args[1] = h;
-- 
cgit 1.2.3-korg


From 667bbcb6c099d1b74f95c6963ddf37a32e7afc29 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Tue, 3 Oct 2006 15:56:09 -0700
Subject: [XFRM] STATE: Use destination address for src hash.

Src hash is introduced for Mobile IPv6 route optimization usage.
On current kenrel code it is calculated with source address only.
It results we uses the same hash value for outbound state (when
the node has only one address for Mobile IPv6).
This patch use also destination address as peer information for
src hash to be dispersed.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_hash.h  |  7 ++++---
 net/xfrm/xfrm_state.c | 16 +++++++++-------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 6ac4e4f033aca..d401dc8f05ed4 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -41,17 +41,18 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t
 	return (h ^ (h >> 16)) & hmask;
 }
 
-static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr,
+static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
+				       xfrm_address_t *saddr,
 				       unsigned short family,
 				       unsigned int hmask)
 {
 	unsigned int h = family;
 	switch (family) {
 	case AF_INET:
-		h ^= __xfrm4_addr_hash(saddr);
+		h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
 		break;
 	case AF_INET6:
-		h ^= __xfrm6_addr_hash(saddr);
+		h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
 		break;
 	};
 	return (h ^ (h >> 16)) & hmask;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f927b7330f025..39b8bf3a9ded2 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -63,10 +63,11 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
 	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
 }
 
-static inline unsigned int xfrm_src_hash(xfrm_address_t *addr,
+static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
+					 xfrm_address_t *saddr,
 					 unsigned short family)
 {
-	return __xfrm_src_hash(addr, family, xfrm_state_hmask);
+	return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
 }
 
 static inline unsigned int
@@ -92,7 +93,8 @@ static void xfrm_hash_transfer(struct hlist_head *list,
 				    nhashmask);
 		hlist_add_head(&x->bydst, ndsttable+h);
 
-		h = __xfrm_src_hash(&x->props.saddr, x->props.family,
+		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
+				    x->props.family,
 				    nhashmask);
 		hlist_add_head(&x->bysrc, nsrctable+h);
 
@@ -458,7 +460,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
 
 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
 {
-	unsigned int h = xfrm_src_hash(saddr, family);
+	unsigned int h = xfrm_src_hash(daddr, saddr, family);
 	struct xfrm_state *x;
 	struct hlist_node *entry;
 
@@ -587,7 +589,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
 			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
-			h = xfrm_src_hash(saddr, family);
+			h = xfrm_src_hash(daddr, saddr, family);
 			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
@@ -622,7 +624,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 			  x->props.reqid, x->props.family);
 	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 
-	h = xfrm_src_hash(&x->props.saddr, x->props.family);
+	h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
 	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 
 	if (x->id.spi) {
@@ -748,7 +750,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
 		add_timer(&x->timer);
 		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
-		h = xfrm_src_hash(saddr, family);
+		h = xfrm_src_hash(daddr, saddr, family);
 		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 		wake_up(&km_waitq);
 	}
-- 
cgit 1.2.3-korg


From ae8c05779ac2f286b872db9ebea0c3c0a031ad1e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 3 Oct 2006 16:00:26 -0700
Subject: [XFRM]: Clearing xfrm_policy_count[] to zero during flush is
 incorrect.

When we flush policies, we do a type match so we might not
actually delete all policies matching a certain direction.

So keep track of how many policies we actually kill and
subtract that number from xfrm_policy_count[dir] at the
end.

Based upon a patch by Masahide NAKAMURA.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b6e2e79d72612..2a7861661f14e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -778,8 +778,9 @@ void xfrm_policy_flush(u8 type)
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 		struct xfrm_policy *pol;
 		struct hlist_node *entry;
-		int i;
+		int i, killed;
 
+		killed = 0;
 	again1:
 		hlist_for_each_entry(pol, entry,
 				     &xfrm_policy_inexact[dir], bydst) {
@@ -790,6 +791,7 @@ void xfrm_policy_flush(u8 type)
 			write_unlock_bh(&xfrm_policy_lock);
 
 			xfrm_policy_kill(pol);
+			killed++;
 
 			write_lock_bh(&xfrm_policy_lock);
 			goto again1;
@@ -807,13 +809,14 @@ void xfrm_policy_flush(u8 type)
 				write_unlock_bh(&xfrm_policy_lock);
 
 				xfrm_policy_kill(pol);
+				killed++;
 
 				write_lock_bh(&xfrm_policy_lock);
 				goto again2;
 			}
 		}
 
-		xfrm_policy_count[dir] = 0;
+		xfrm_policy_count[dir] -= killed;
 	}
 	atomic_inc(&flow_cache_genid);
 	write_unlock_bh(&xfrm_policy_lock);
-- 
cgit 1.2.3-korg


From 617dbeaa3f2987acc83c1149409685005e9dd740 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Tue, 3 Oct 2006 16:25:34 -0700
Subject: [TIPC]: fix printk warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gcc spits out this warning:

net/tipc/link.c: In function ‘link_retransmit_failure’:
net/tipc/link.c:1669: warning: cast from pointer to integer of different
size

More than a little bit ugly, storing integers in void*, but at least the
code is correct, unlike some of the more crufty Linux kernel code found
elsewhere.

Rather than having two casts to massage the value into u32, it's easier
just to have a single cast and use "%lu", since it's just a printk.

Signed-off-by: Jeff Garzik <jeff@garzik.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 693f02eca6d68..53bc8cb5adbc7 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1666,8 +1666,9 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
 		char addr_string[16];
 
 		tipc_printf(TIPC_OUTPUT, "Msg seq number: %u,  ", msg_seqno(msg));
-		tipc_printf(TIPC_OUTPUT, "Outstanding acks: %u\n", (u32)TIPC_SKB_CB(buf)->handle);
-		
+		tipc_printf(TIPC_OUTPUT, "Outstanding acks: %lu\n",
+				     (unsigned long) TIPC_SKB_CB(buf)->handle);
+
 		n_ptr = l_ptr->owner->next;
 		tipc_node_lock(n_ptr);
 
-- 
cgit 1.2.3-korg


From 0c1cca1d8e0d58775dad43374f925e6cddf1bebc Mon Sep 17 00:00:00 2001
From: Om Narasimhan <om.turyx@gmail.com>
Date: Tue, 3 Oct 2006 16:27:18 -0700
Subject: [ATM]: kmalloc to kzalloc patches for drivers/atm

Signed-off-by: Om Narasimhan <om.turyx@gmail.com>
Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/adummy.c     |  6 ++----
 drivers/atm/firestream.c | 12 +++---------
 drivers/atm/he.c         |  4 +---
 drivers/atm/horizon.c    |  4 +---
 drivers/atm/idt77252.c   | 23 ++++++-----------------
 drivers/atm/lanai.c      |  8 +-------
 drivers/atm/zatm.c       |  6 ++----
 7 files changed, 16 insertions(+), 47 deletions(-)

diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c
index 6cc93de0b71d1..ac2c10822be07 100644
--- a/drivers/atm/adummy.c
+++ b/drivers/atm/adummy.c
@@ -113,15 +113,13 @@ static int __init adummy_init(void)
 
 	printk(KERN_ERR "adummy: version %s\n", DRV_VERSION);
 
-	adummy_dev = (struct adummy_dev *) kmalloc(sizeof(struct adummy_dev),
+	adummy_dev = kzalloc(sizeof(struct adummy_dev),
 						   GFP_KERNEL);
 	if (!adummy_dev) {
-		printk(KERN_ERR DEV_LABEL ": kmalloc() failed\n");
+		printk(KERN_ERR DEV_LABEL ": kzalloc() failed\n");
 		err = -ENOMEM;
 		goto out;
 	}
-	memset(adummy_dev, 0, sizeof(struct adummy_dev));
-
 	atm_dev = atm_dev_register(DEV_LABEL, &adummy_ops, -1, NULL);
 	if (!atm_dev) {
 		printk(KERN_ERR DEV_LABEL ": atm_dev_register() failed\n");
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 38fc054bd6715..5f25e5efefcd7 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1784,7 +1784,7 @@ static int __devinit fs_init (struct fs_dev *dev)
 		write_fs (dev, RAM, (1 << (28 - FS155_VPI_BITS - FS155_VCI_BITS)) - 1);
 		dev->nchannels = FS155_NR_CHANNELS;
 	}
-	dev->atm_vccs = kmalloc (dev->nchannels * sizeof (struct atm_vcc *), 
+	dev->atm_vccs = kcalloc (dev->nchannels, sizeof (struct atm_vcc *),
 				 GFP_KERNEL);
 	fs_dprintk (FS_DEBUG_ALLOC, "Alloc atmvccs: %p(%Zd)\n",
 		    dev->atm_vccs, dev->nchannels * sizeof (struct atm_vcc *));
@@ -1794,9 +1794,8 @@ static int __devinit fs_init (struct fs_dev *dev)
 		/* XXX Clean up..... */
 		return 1;
 	}
-	memset (dev->atm_vccs, 0, dev->nchannels * sizeof (struct atm_vcc *));
 
-	dev->tx_inuse = kmalloc (dev->nchannels / 8 /* bits/byte */ , GFP_KERNEL);
+	dev->tx_inuse = kzalloc (dev->nchannels / 8 /* bits/byte */ , GFP_KERNEL);
 	fs_dprintk (FS_DEBUG_ALLOC, "Alloc tx_inuse: %p(%d)\n", 
 		    dev->atm_vccs, dev->nchannels / 8);
 
@@ -1805,8 +1804,6 @@ static int __devinit fs_init (struct fs_dev *dev)
 		/* XXX Clean up..... */
 		return 1;
 	}
-	memset (dev->tx_inuse, 0, dev->nchannels / 8);
-
 	/* -- RAS1 : FS155 and 50 differ. Default (0) should be OK for both */
 	/* -- RAS2 : FS50 only: Default is OK. */
 
@@ -1893,14 +1890,11 @@ static int __devinit firestream_init_one (struct pci_dev *pci_dev,
 	if (pci_enable_device(pci_dev)) 
 		goto err_out;
 
-	fs_dev = kmalloc (sizeof (struct fs_dev), GFP_KERNEL);
+	fs_dev = kzalloc (sizeof (struct fs_dev), GFP_KERNEL);
 	fs_dprintk (FS_DEBUG_ALLOC, "Alloc fs-dev: %p(%Zd)\n",
 		    fs_dev, sizeof (struct fs_dev));
 	if (!fs_dev)
 		goto err_out;
-
-	memset (fs_dev, 0, sizeof (struct fs_dev));
-  
 	atm_dev = atm_dev_register("fs", &ops, -1, NULL);
 	if (!atm_dev)
 		goto err_out_free_fs_dev;
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index f2511b42dba20..b22a9142b2406 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -383,14 +383,12 @@ he_init_one(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
 	}
 	pci_set_drvdata(pci_dev, atm_dev);
 
-	he_dev = (struct he_dev *) kmalloc(sizeof(struct he_dev),
+	he_dev = kzalloc(sizeof(struct he_dev),
 							GFP_KERNEL);
 	if (!he_dev) {
 		err = -ENOMEM;
 		goto init_one_failure;
 	}
-	memset(he_dev, 0, sizeof(struct he_dev));
-
 	he_dev->pci_dev = pci_dev;
 	he_dev->atm_dev = atm_dev;
 	he_dev->atm_dev->dev_data = he_dev;
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index d1113e845f953..209dba1c70da5 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2719,7 +2719,7 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_
 		goto out_disable;
 	}
 
-	dev = kmalloc(sizeof(hrz_dev), GFP_KERNEL);
+	dev = kzalloc(sizeof(hrz_dev), GFP_KERNEL);
 	if (!dev) {
 		// perhaps we should be nice: deregister all adapters and abort?
 		PRINTD(DBG_ERR, "out of memory");
@@ -2727,8 +2727,6 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_
 		goto out_release;
 	}
 
-	memset(dev, 0, sizeof(hrz_dev));
-
 	pci_set_drvdata(pci_dev, dev);
 
 	// grab IRQ and install handler - move this someplace more sensible
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index b0369bb20f085..7487f0ad68e9c 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -642,11 +642,9 @@ alloc_scq(struct idt77252_dev *card, int class)
 {
 	struct scq_info *scq;
 
-	scq = (struct scq_info *) kmalloc(sizeof(struct scq_info), GFP_KERNEL);
+	scq = kzalloc(sizeof(struct scq_info), GFP_KERNEL);
 	if (!scq)
 		return NULL;
-	memset(scq, 0, sizeof(struct scq_info));
-
 	scq->base = pci_alloc_consistent(card->pcidev, SCQ_SIZE,
 					 &scq->paddr);
 	if (scq->base == NULL) {
@@ -2142,11 +2140,9 @@ idt77252_init_est(struct vc_map *vc, int pcr)
 {
 	struct rate_estimator *est;
 
-	est = kmalloc(sizeof(struct rate_estimator), GFP_KERNEL);
+	est = kzalloc(sizeof(struct rate_estimator), GFP_KERNEL);
 	if (!est)
 		return NULL;
-	memset(est, 0, sizeof(*est));
-
 	est->maxcps = pcr < 0 ? -pcr : pcr;
 	est->cps = est->maxcps;
 	est->avcps = est->cps << 5;
@@ -2451,14 +2447,12 @@ idt77252_open(struct atm_vcc *vcc)
 
 	index = VPCI2VC(card, vpi, vci);
 	if (!card->vcs[index]) {
-		card->vcs[index] = kmalloc(sizeof(struct vc_map), GFP_KERNEL);
+		card->vcs[index] = kzalloc(sizeof(struct vc_map), GFP_KERNEL);
 		if (!card->vcs[index]) {
 			printk("%s: can't alloc vc in open()\n", card->name);
 			up(&card->mutex);
 			return -ENOMEM;
 		}
-		memset(card->vcs[index], 0, sizeof(struct vc_map));
-
 		card->vcs[index]->card = card;
 		card->vcs[index]->index = index;
 
@@ -2926,13 +2920,11 @@ open_card_oam(struct idt77252_dev *card)
 		for (vci = 3; vci < 5; vci++) {
 			index = VPCI2VC(card, vpi, vci);
 
-			vc = kmalloc(sizeof(struct vc_map), GFP_KERNEL);
+			vc = kzalloc(sizeof(struct vc_map), GFP_KERNEL);
 			if (!vc) {
 				printk("%s: can't alloc vc\n", card->name);
 				return -ENOMEM;
 			}
-			memset(vc, 0, sizeof(struct vc_map));
-
 			vc->index = index;
 			card->vcs[index] = vc;
 
@@ -2995,12 +2987,11 @@ open_card_ubr0(struct idt77252_dev *card)
 {
 	struct vc_map *vc;
 
-	vc = kmalloc(sizeof(struct vc_map), GFP_KERNEL);
+	vc = kzalloc(sizeof(struct vc_map), GFP_KERNEL);
 	if (!vc) {
 		printk("%s: can't alloc vc\n", card->name);
 		return -ENOMEM;
 	}
-	memset(vc, 0, sizeof(struct vc_map));
 	card->vcs[0] = vc;
 	vc->class = SCHED_UBR0;
 
@@ -3695,14 +3686,12 @@ idt77252_init_one(struct pci_dev *pcidev, const struct pci_device_id *id)
 		goto err_out_disable_pdev;
 	}
 
-	card = kmalloc(sizeof(struct idt77252_dev), GFP_KERNEL);
+	card = kzalloc(sizeof(struct idt77252_dev), GFP_KERNEL);
 	if (!card) {
 		printk("idt77252-%d: can't allocate private data\n", index);
 		err = -ENOMEM;
 		goto err_out_disable_pdev;
 	}
-	memset(card, 0, sizeof(struct idt77252_dev));
-
 	card->revision = revision;
 	card->index = index;
 	card->pcidev = pcidev;
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index fe60a59b7fc02..b9568e10965a0 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -1482,16 +1482,10 @@ static inline void vcc_table_deallocate(const struct lanai_dev *lanai)
 static inline struct lanai_vcc *new_lanai_vcc(void)
 {
 	struct lanai_vcc *lvcc;
-	lvcc = (struct lanai_vcc *) kmalloc(sizeof(*lvcc), GFP_KERNEL);
+	lvcc =  kzalloc(sizeof(*lvcc), GFP_KERNEL);
 	if (likely(lvcc != NULL)) {
-		lvcc->vbase = NULL;
-		lvcc->rx.atmvcc = lvcc->tx.atmvcc = NULL;
-		lvcc->nref = 0;
-		memset(&lvcc->stats, 0, sizeof lvcc->stats);
-		lvcc->rx.buf.start = lvcc->tx.buf.start = NULL;
 		skb_queue_head_init(&lvcc->tx.backlog);
 #ifdef DEBUG
-		lvcc->tx.unqueue = NULL;
 		lvcc->vci = -1;
 #endif
 	}
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 2c65e82f0d6b8..c491ec455cac4 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -603,9 +603,8 @@ static int start_rx(struct atm_dev *dev)
 DPRINTK("start_rx\n");
 	zatm_dev = ZATM_DEV(dev);
 	size = sizeof(struct atm_vcc *)*zatm_dev->chans;
-	zatm_dev->rx_map = (struct atm_vcc **) kmalloc(size,GFP_KERNEL);
+	zatm_dev->rx_map =  kzalloc(size,GFP_KERNEL);
 	if (!zatm_dev->rx_map) return -ENOMEM;
-	memset(zatm_dev->rx_map,0,size);
 	/* set VPI/VCI split (use all VCIs and give what's left to VPIs) */
 	zpokel(zatm_dev,(1 << dev->ci_range.vci_bits)-1,uPD98401_VRR);
 	/* prepare free buffer pools */
@@ -951,9 +950,8 @@ static int open_tx_first(struct atm_vcc *vcc)
 	skb_queue_head_init(&zatm_vcc->tx_queue);
 	init_waitqueue_head(&zatm_vcc->tx_wait);
 	/* initialize ring */
-	zatm_vcc->ring = kmalloc(RING_SIZE,GFP_KERNEL);
+	zatm_vcc->ring = kzalloc(RING_SIZE,GFP_KERNEL);
 	if (!zatm_vcc->ring) return -ENOMEM;
-	memset(zatm_vcc->ring,0,RING_SIZE);
 	loop = zatm_vcc->ring+RING_ENTRIES*RING_WORDS;
 	loop[0] = uPD98401_TXPD_V;
 	loop[1] = loop[2] = 0;
-- 
cgit 1.2.3-korg


From 3a4e5e2033eb7558ebe307e46f5fb6e63b92cab8 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Tue, 3 Oct 2006 16:27:55 -0700
Subject: [ATM]: [ambassador] Change the return type to reflect reality

Signed-off-by: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/ambassador.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index 4521a249dd563..da599e6e9d34c 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -915,8 +915,8 @@ static irqreturn_t interrupt_handler(int irq, void *dev_id,
 
 /********** make rate (not quite as much fun as Horizon) **********/
 
-static unsigned int make_rate (unsigned int rate, rounding r,
-			       u16 * bits, unsigned int * actual) {
+static int make_rate (unsigned int rate, rounding r,
+		      u16 * bits, unsigned int * actual) {
   unsigned char exp = -1; // hush gcc
   unsigned int man = -1;  // hush gcc
   
-- 
cgit 1.2.3-korg


From 3e0c0ac84c01f41fb266bcdd2802708409901bed Mon Sep 17 00:00:00 2001
From: Chas Williams <chas@cmf.nrl.navy.mil>
Date: Tue, 3 Oct 2006 16:28:31 -0700
Subject: [ATM]: [zatm] always *pcr in alloc_shaper()

Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/zatm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index c491ec455cac4..083c5d3f2e185 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -800,6 +800,7 @@ static int alloc_shaper(struct atm_dev *dev,int *pcr,int min,int max,int ubr)
 		i = m = 1;
 		zatm_dev->ubr_ref_cnt++;
 		zatm_dev->ubr = shaper;
+		*pcr = 0;
 	}
 	else {
 		if (min) {
-- 
cgit 1.2.3-korg


From 2473ffe3cae0f86341958e3cf962bb4fc261d028 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 Oct 2006 16:29:52 -0700
Subject: [NET_SCHED]: Remove old estimator implementation

Remove unused file, estimators live in net/core/gen_estimator.c now.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/estimator.c | 196 --------------------------------------------------
 1 file changed, 196 deletions(-)
 delete mode 100644 net/sched/estimator.c

diff --git a/net/sched/estimator.c b/net/sched/estimator.c
deleted file mode 100644
index 0ebc98e9be2d8..0000000000000
--- a/net/sched/estimator.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * net/sched/estimator.c	Simple rate estimator.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/init.h>
-#include <net/sock.h>
-#include <net/pkt_sched.h>
-
-/*
-   This code is NOT intended to be used for statistics collection,
-   its purpose is to provide a base for statistical multiplexing
-   for controlled load service.
-   If you need only statistics, run a user level daemon which
-   periodically reads byte counters.
-
-   Unfortunately, rate estimation is not a very easy task.
-   F.e. I did not find a simple way to estimate the current peak rate
-   and even failed to formulate the problem 8)8)
-
-   So I preferred not to built an estimator into the scheduler,
-   but run this task separately.
-   Ideally, it should be kernel thread(s), but for now it runs
-   from timers, which puts apparent top bounds on the number of rated
-   flows, has minimal overhead on small, but is enough
-   to handle controlled load service, sets of aggregates.
-
-   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
-
-   avrate = avrate*(1-W) + rate*W
-
-   where W is chosen as negative power of 2: W = 2^(-ewma_log)
-
-   The resulting time constant is:
-
-   T = A/(-ln(1-W))
-
-
-   NOTES.
-
-   * The stored value for avbps is scaled by 2^5, so that maximal
-     rate is ~1Gbit, avpps is scaled by 2^10.
-
-   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
-     for HZ=100 and HZ=1024 8)), maximal interval
-     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
-     are too expensive, longer ones can be implemented
-     at user level painlessly.
- */
-
-#define EST_MAX_INTERVAL	5
-
-struct qdisc_estimator
-{
-	struct qdisc_estimator	*next;
-	struct tc_stats		*stats;
-	spinlock_t		*stats_lock;
-	unsigned		interval;
-	int			ewma_log;
-	u64			last_bytes;
-	u32			last_packets;
-	u32			avpps;
-	u32			avbps;
-};
-
-struct qdisc_estimator_head
-{
-	struct timer_list	timer;
-	struct qdisc_estimator	*list;
-};
-
-static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1];
-
-/* Estimator array lock */
-static DEFINE_RWLOCK(est_lock);
-
-static void est_timer(unsigned long arg)
-{
-	int idx = (int)arg;
-	struct qdisc_estimator *e;
-
-	read_lock(&est_lock);
-	for (e = elist[idx].list; e; e = e->next) {
-		struct tc_stats *st = e->stats;
-		u64 nbytes;
-		u32 npackets;
-		u32 rate;
-
-		spin_lock(e->stats_lock);
-		nbytes = st->bytes;
-		npackets = st->packets;
-		rate = (nbytes - e->last_bytes)<<(7 - idx);
-		e->last_bytes = nbytes;
-		e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
-		st->bps = (e->avbps+0xF)>>5;
-
-		rate = (npackets - e->last_packets)<<(12 - idx);
-		e->last_packets = npackets;
-		e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
-		e->stats->pps = (e->avpps+0x1FF)>>10;
-		spin_unlock(e->stats_lock);
-	}
-
-	mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
-	read_unlock(&est_lock);
-}
-
-int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt)
-{
-	struct qdisc_estimator *est;
-	struct tc_estimator *parm = RTA_DATA(opt);
-
-	if (RTA_PAYLOAD(opt) < sizeof(*parm))
-		return -EINVAL;
-
-	if (parm->interval < -2 || parm->interval > 3)
-		return -EINVAL;
-
-	est = kzalloc(sizeof(*est), GFP_KERNEL);
-	if (est == NULL)
-		return -ENOBUFS;
-
-	est->interval = parm->interval + 2;
-	est->stats = stats;
-	est->stats_lock = stats_lock;
-	est->ewma_log = parm->ewma_log;
-	est->last_bytes = stats->bytes;
-	est->avbps = stats->bps<<5;
-	est->last_packets = stats->packets;
-	est->avpps = stats->pps<<10;
-
-	est->next = elist[est->interval].list;
-	if (est->next == NULL) {
-		init_timer(&elist[est->interval].timer);
-		elist[est->interval].timer.data = est->interval;
-		elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
-		elist[est->interval].timer.function = est_timer;
-		add_timer(&elist[est->interval].timer);
-	}
-	write_lock_bh(&est_lock);
-	elist[est->interval].list = est;
-	write_unlock_bh(&est_lock);
-	return 0;
-}
-
-void qdisc_kill_estimator(struct tc_stats *stats)
-{
-	int idx;
-	struct qdisc_estimator *est, **pest;
-
-	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
-		int killed = 0;
-		pest = &elist[idx].list;
-		while ((est=*pest) != NULL) {
-			if (est->stats != stats) {
-				pest = &est->next;
-				continue;
-			}
-
-			write_lock_bh(&est_lock);
-			*pest = est->next;
-			write_unlock_bh(&est_lock);
-
-			kfree(est);
-			killed++;
-		}
-		if (killed && elist[idx].list == NULL)
-			del_timer(&elist[idx].timer);
-	}
-}
-
-EXPORT_SYMBOL(qdisc_kill_estimator);
-EXPORT_SYMBOL(qdisc_new_estimator);
-- 
cgit 1.2.3-korg


From 80246ab36ec8baf7d107254adb166baa555a59f8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 3 Oct 2006 16:49:53 -0700
Subject: [TCP]: Kill warning in tcp_clean_rtx_queue().

GCC can't tell we always initialize 'tv' in all the cases
we actually use it, so explicitly set it up with zeros.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3f884cea14ff4..cf06accbe687a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2259,7 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 	u32 pkts_acked = 0;
 	void (*rtt_sample)(struct sock *sk, u32 usrtt)
 		= icsk->icsk_ca_ops->rtt_sample;
-	struct timeval tv;
+	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
 
 	while ((skb = skb_peek(&sk->sk_write_queue)) &&
 	       skb != sk->sk_send_head) {
-- 
cgit 1.2.3-korg


From 0a69452cb45add0841c2bc1e75c25f6bd4f1d8d9 Mon Sep 17 00:00:00 2001
From: Diego Beltrami <diego.beltrami@gmail.com>
Date: Tue, 3 Oct 2006 23:47:05 -0700
Subject: [XFRM]: BEET mode

This patch introduces the BEET mode (Bound End-to-End Tunnel) with as
specified by the ietf draft at the following link:

http://www.ietf.org/internet-drafts/draft-nikander-esp-beet-mode-06.txt

The patch provides only single family support (i.e. inner family =
outer family).

Signed-off-by: Diego Beltrami <diego.beltrami@gmail.com>
Signed-off-by: Miika Komu     <miika@iki.fi>
Signed-off-by: Herbert Xu     <herbert@gondor.apana.org.au>
Signed-off-by: Abhinav Pathak <abhinav.pathak@hiit.fi>
Signed-off-by: Jeff Ahrenholz <ahrenholz@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h         |   1 +
 include/linux/ip.h         |   9 +++
 include/linux/ipsec.h      |   3 +-
 include/linux/xfrm.h       |   3 +-
 net/ipv4/Kconfig           |   9 +++
 net/ipv4/Makefile          |   1 +
 net/ipv4/esp4.c            |  26 ++++++---
 net/ipv4/ipcomp.c          |   5 +-
 net/ipv4/xfrm4_mode_beet.c | 139 +++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/Kconfig           |  10 ++++
 net/ipv6/Makefile          |   1 +
 net/ipv6/ipcomp6.c         |   5 +-
 net/ipv6/xfrm6_mode_beet.c | 107 ++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_user.c       |   1 +
 14 files changed, 309 insertions(+), 11 deletions(-)
 create mode 100644 net/ipv4/xfrm4_mode_beet.c
 create mode 100644 net/ipv6/xfrm6_mode_beet.c

diff --git a/include/linux/in.h b/include/linux/in.h
index d79fc75fa7c2c..2619859f6e1b3 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -40,6 +40,7 @@ enum {
 
   IPPROTO_ESP = 50,            /* Encapsulation Security Payload protocol */
   IPPROTO_AH = 51,             /* Authentication Header protocol       */
+  IPPROTO_BEETPH = 94,	       /* IP option pseudo header for BEET */
   IPPROTO_PIM    = 103,		/* Protocol Independent Multicast	*/
 
   IPPROTO_COMP   = 108,                /* Compression Header protocol */
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 6b25d36fc54c4..ecee9bb27d0e6 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -80,6 +80,8 @@
 #define	IPOPT_TS_TSANDADDR	1		/* timestamps and addresses */
 #define	IPOPT_TS_PRESPEC	3		/* specified modules only */
 
+#define IPV4_BEET_PHMAXLEN 8
+
 struct iphdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	__u8	ihl:4,
@@ -123,4 +125,11 @@ struct ip_comp_hdr {
 	__be16 cpi;
 };
 
+struct ip_beet_phdr {
+	__u8 nexthdr;
+	__u8 hdrlen;
+	__u8 padlen;
+	__u8 reserved;
+};
+
 #endif	/* _LINUX_IP_H */
diff --git a/include/linux/ipsec.h b/include/linux/ipsec.h
index d3c527616b5e9..d17a6302a0e96 100644
--- a/include/linux/ipsec.h
+++ b/include/linux/ipsec.h
@@ -12,7 +12,8 @@
 enum {
 	IPSEC_MODE_ANY		= 0,	/* We do not support this for SA */
 	IPSEC_MODE_TRANSPORT	= 1,
-	IPSEC_MODE_TUNNEL	= 2
+	IPSEC_MODE_TUNNEL	= 2,
+	IPSEC_MODE_BEET         = 3
 };
 
 enum {
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 430afd0582697..8ae7f744917b0 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -129,7 +129,8 @@ enum
 #define XFRM_MODE_TUNNEL 1
 #define XFRM_MODE_ROUTEOPTIMIZATION 2
 #define XFRM_MODE_IN_TRIGGER 3
-#define XFRM_MODE_MAX 4
+#define XFRM_MODE_BEET 4
+#define XFRM_MODE_MAX 5
 
 /* Netlink configuration messages.  */
 enum {
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index d172a9804448d..5572071af735e 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -434,6 +434,15 @@ config INET_XFRM_MODE_TUNNEL
 
 	  If unsure, say Y.
 
+config INET_XFRM_MODE_BEET
+	tristate "IP: IPsec BEET mode"
+	default y
+	select XFRM
+	---help---
+	  Support for IPsec BEET mode.
+
+	  If unsure, say Y.
+
 config INET_DIAG
 	tristate "INET: socket monitoring interface"
 	default y
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f66049e28aebb..15645c51520cc 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_INET_AH) += ah4.o
 obj-$(CONFIG_INET_ESP) += esp4.o
 obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
 obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
+obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
 obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
 obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
 obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 13b29360d102f..b5c205b576698 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -253,7 +253,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 		 *    as per draft-ietf-ipsec-udp-encaps-06,
 		 *    section 3.1.2
 		 */
-		if (x->props.mode == XFRM_MODE_TRANSPORT)
+		if (x->props.mode == XFRM_MODE_TRANSPORT ||
+		    x->props.mode == XFRM_MODE_BEET)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
@@ -271,17 +272,28 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
-
-	if (x->props.mode == XFRM_MODE_TUNNEL) {
-		mtu = ALIGN(mtu + 2, blksize);
-	} else {
-		/* The worst case. */
+	int enclen = 0;
+
+	switch (x->props.mode) {
+	case XFRM_MODE_TUNNEL:
+		mtu = ALIGN(mtu +2, blksize);
+		break;
+	default:
+	case XFRM_MODE_TRANSPORT:
+		/* The worst case */
 		mtu = ALIGN(mtu + 2, 4) + blksize - 4;
+		break;
+	case XFRM_MODE_BEET:
+ 		/* The worst case. */
+		enclen = IPV4_BEET_PHMAXLEN;
+		mtu = ALIGN(mtu + enclen + 2, blksize);
+		break;
 	}
+
 	if (esp->conf.padlen)
 		mtu = ALIGN(mtu, esp->conf.padlen);
 
-	return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+	return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen;
 }
 
 static void esp4_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 2017d36024d48..3839b706142e3 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -206,6 +206,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 {
 	struct xfrm_state *t;
+	u8 mode = XFRM_MODE_TUNNEL;
 	
 	t = xfrm_state_alloc();
 	if (t == NULL)
@@ -216,7 +217,9 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 	t->id.daddr.a4 = x->id.daddr.a4;
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET;
-	t->props.mode = XFRM_MODE_TUNNEL;
+	if (x->props.mode == XFRM_MODE_BEET)
+		mode = x->props.mode;
+	t->props.mode = mode;
 	t->props.saddr.a4 = x->props.saddr.a4;
 	t->props.flags = x->props.flags;
 
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
new file mode 100644
index 0000000000000..89cf59ea7bbe7
--- /dev/null
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -0,0 +1,139 @@
+/*
+ * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4.
+ *
+ * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
+ *                    Miika Komu     <miika@iki.fi>
+ *                    Herbert Xu     <herbert@gondor.apana.org.au>
+ *                    Abhinav Pathak <abhinav.pathak@hiit.fi>
+ *                    Jeff Ahrenholz <ahrenholz@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
+ * The following fields in it shall be filled in by x->type->output:
+ *      tot_len
+ *      check
+ *
+ * On exit, skb->h will be set to the start of the payload to be processed
+ * by x->type->output and skb->nh will be set to the top IP header.
+ */
+static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct iphdr *iph, *top_iph = NULL;
+	int hdrlen, optlen;
+
+	iph = skb->nh.iph;
+	skb->h.ipiph = iph;
+
+	hdrlen = 0;
+	optlen = iph->ihl * 4 - sizeof(*iph);
+	if (unlikely(optlen))
+		hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
+
+	skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen);
+	top_iph = skb->nh.iph;
+	hdrlen = iph->ihl * 4 - optlen;
+	skb->h.raw += hdrlen;
+
+	memmove(top_iph, iph, hdrlen);
+	if (unlikely(optlen)) {
+		struct ip_beet_phdr *ph;
+
+		BUG_ON(optlen < 0);
+
+		ph = (struct ip_beet_phdr *)skb->h.raw;
+		ph->padlen = 4 - (optlen & 4);
+		ph->hdrlen = (optlen + ph->padlen + sizeof(*ph)) / 8;
+		ph->nexthdr = top_iph->protocol;
+
+		top_iph->protocol = IPPROTO_BEETPH;
+		top_iph->ihl = sizeof(struct iphdr) / 4;
+	}
+
+	top_iph->saddr = x->props.saddr.a4;
+	top_iph->daddr = x->id.daddr.a4;
+
+	return 0;
+}
+
+static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct iphdr *iph = skb->nh.iph;
+	int phlen = 0;
+	int optlen = 0;
+	__u8 ph_nexthdr = 0, protocol = 0;
+	int err = -EINVAL;
+
+	protocol = iph->protocol;
+
+	if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
+		struct ip_beet_phdr *ph = (struct ip_beet_phdr*)(iph + 1);
+
+		if (!pskb_may_pull(skb, sizeof(*ph)))
+			goto out;
+
+		phlen = ph->hdrlen * 8;
+		optlen = phlen - ph->padlen - sizeof(*ph);
+		if (optlen < 0 || optlen & 3 || optlen > 250)
+			goto out;
+
+		if (!pskb_may_pull(skb, phlen))
+			goto out;
+
+		ph_nexthdr = ph->nexthdr;
+	}
+
+	skb_push(skb, sizeof(*iph) - phlen + optlen);
+	memmove(skb->data, skb->nh.raw, sizeof(*iph));
+	skb->nh.raw = skb->data;
+
+	iph = skb->nh.iph;
+	iph->ihl = (sizeof(*iph) + optlen) / 4;
+	iph->tot_len = htons(skb->len);
+	iph->daddr = x->sel.daddr.a4;
+	iph->saddr = x->sel.saddr.a4;
+	if (ph_nexthdr)
+		iph->protocol = ph_nexthdr;
+	else
+		iph->protocol = protocol;
+	iph->check = 0;
+	iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+	err = 0;
+out:
+	return err;
+}
+
+static struct xfrm_mode xfrm4_beet_mode = {
+	.input = xfrm4_beet_input,
+	.output = xfrm4_beet_output,
+	.owner = THIS_MODULE,
+	.encap = XFRM_MODE_BEET,
+};
+
+static int __init xfrm4_beet_init(void)
+{
+	return xfrm_register_mode(&xfrm4_beet_mode, AF_INET);
+}
+
+static void __exit xfrm4_beet_exit(void)
+{
+	int err;
+
+	err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET);
+	BUG_ON(err);
+}
+
+module_init(xfrm4_beet_init);
+module_exit(xfrm4_beet_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index a2d211da2abac..a460e8132b4d4 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -136,6 +136,16 @@ config INET6_XFRM_MODE_TUNNEL
 
 	  If unsure, say Y.
 
+config INET6_XFRM_MODE_BEET
+	tristate "IPv6: IPsec BEET mode"
+	depends on IPV6
+	default IPV6
+	select XFRM
+	---help---
+	  Support for IPsec BEET mode.
+
+	  If unsure, say Y.
+
 config INET6_XFRM_MODE_ROUTEOPTIMIZATION
 	tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
 	depends on IPV6 && EXPERIMENTAL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 0213c6612b58d..87274e47fe327 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
 obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
 obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
 obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
+obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
 obj-$(CONFIG_NETFILTER)	+= netfilter/
 
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index a2860e35efd7d..71f59f18ede82 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -199,6 +199,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 {
 	struct xfrm_state *t = NULL;
+	u8 mode = XFRM_MODE_TUNNEL;
 
 	t = xfrm_state_alloc();
 	if (!t)
@@ -212,7 +213,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 	memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET6;
-	t->props.mode = XFRM_MODE_TUNNEL;
+	if (x->props.mode == XFRM_MODE_BEET)
+		mode = x->props.mode;
+	t->props.mode = mode;
 	memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
 
 	if (xfrm_init_state(t))
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
new file mode 100644
index 0000000000000..edcfffa9e87b1
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -0,0 +1,107 @@
+/*
+ * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6.
+ *
+ * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
+ *                    Miika Komu     <miika@iki.fi>
+ *                    Herbert Xu     <herbert@gondor.apana.org.au>
+ *                    Abhinav Pathak <abhinav.pathak@hiit.fi>
+ *                    Jeff Ahrenholz <ahrenholz@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dsfield.h>
+#include <net/dst.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
+ * The following fields in it shall be filled in by x->type->output:
+ *	payload_len
+ *
+ * On exit, skb->h will be set to the start of the encapsulation header to be
+ * filled in by x->type->output and skb->nh will be set to the nextheader field
+ * of the extension header directly preceding the encapsulation header, or in
+ * its absence, that of the top IP header.  The value of skb->data will always
+ * point to the top IP header.
+ */
+static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph, *top_iph;
+	u8 *prevhdr;
+	int hdr_len;
+
+	skb_push(skb, x->props.header_len);
+	iph = skb->nh.ipv6h;
+
+	hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
+	skb->nh.raw = prevhdr - x->props.header_len;
+	skb->h.raw = skb->data + hdr_len;
+	memmove(skb->data, iph, hdr_len);
+
+	skb->nh.raw = skb->data;
+	top_iph = skb->nh.ipv6h;
+	skb->nh.raw = &top_iph->nexthdr;
+	skb->h.ipv6h = top_iph + 1;
+
+	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
+	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
+
+	return 0;
+}
+
+static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6h;
+	int size = sizeof(struct ipv6hdr);
+	int err = -EINVAL;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto out;
+
+	skb_push(skb, size);
+	memmove(skb->data, skb->nh.raw, size);
+	skb->nh.raw = skb->data;
+
+	skb->mac.raw = memmove(skb->data - skb->mac_len,
+			       skb->mac.raw, skb->mac_len);
+
+	ip6h = skb->nh.ipv6h;
+	ip6h->payload_len = htons(skb->len - size);
+	ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
+	ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
+	err = 0;
+out:
+	return err;
+}
+
+static struct xfrm_mode xfrm6_beet_mode = {
+	.input = xfrm6_beet_input,
+	.output = xfrm6_beet_output,
+	.owner = THIS_MODULE,
+	.encap = XFRM_MODE_BEET,
+};
+
+static int __init xfrm6_beet_init(void)
+{
+	return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6);
+}
+
+static void __exit xfrm6_beet_exit(void)
+{
+	int err;
+
+	err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6);
+	BUG_ON(err);
+}
+
+module_init(xfrm6_beet_init);
+module_exit(xfrm6_beet_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c59a78d2923a5..d54b3a70d5dfb 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -211,6 +211,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 	case XFRM_MODE_TRANSPORT:
 	case XFRM_MODE_TUNNEL:
 	case XFRM_MODE_ROUTEOPTIMIZATION:
+	case XFRM_MODE_BEET:
 		break;
 
 	default:
-- 
cgit 1.2.3-korg