OpenWrt – Rev 1

Subversion Repositories:
Rev:
From 775d6fe74d1eaec2ba387535b068dde2dc89de9e Mon Sep 17 00:00:00 2001
From: Steven Barth <steven@midlink.org>
Date: Thu, 22 May 2014 09:49:05 +0200
Subject: [PATCH] Add support for MAP-E FMRs (mesh mode)

MAP-E FMRs (draft-ietf-softwire-map-10) are rules for IPv4-communication
between MAP CEs (mesh mode) without the need to forward such data to a
border relay. This is similar to how 6rd works but for IPv4 over IPv6.

Signed-off-by: Steven Barth <cyrus@openwrt.org>
---
 include/net/ip6_tunnel.h       |  13 ++
 include/uapi/linux/if_tunnel.h |  13 ++
 net/ipv6/ip6_tunnel.c          | 276 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 291 insertions(+), 11 deletions(-)

--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -15,6 +15,18 @@
 /* determine capability on a per-packet basis */
 #define IP6_TNL_F_CAP_PER_PACKET 0x40000
 
+/* IPv6 tunnel FMR */
+struct __ip6_tnl_fmr {
+       struct __ip6_tnl_fmr *next; /* next fmr in list */
+       struct in6_addr ip6_prefix;
+       struct in_addr ip4_prefix;
+
+       __u8 ip6_prefix_len;
+       __u8 ip4_prefix_len;
+       __u8 ea_len;
+       __u8 offset;
+};
+
 struct __ip6_tnl_parm {
        char name[IFNAMSIZ];    /* name of tunnel device */
        int link;               /* ifindex of underlying L2 interface */
@@ -25,6 +37,7 @@ struct __ip6_tnl_parm {
        __u32 flags;            /* tunnel flags */
        struct in6_addr laddr;  /* local tunnel end-point address */
        struct in6_addr raddr;  /* remote tunnel end-point address */
+       struct __ip6_tnl_fmr *fmrs;     /* FMRs */
 
        __be16                  i_flags;
        __be16                  o_flags;
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -57,10 +57,23 @@ enum {
        IFLA_IPTUN_ENCAP_FLAGS,
        IFLA_IPTUN_ENCAP_SPORT,
        IFLA_IPTUN_ENCAP_DPORT,
+       IFLA_IPTUN_FMRS,
        __IFLA_IPTUN_MAX,
 };
 #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
 
+enum {
+       IFLA_IPTUN_FMR_UNSPEC,
+       IFLA_IPTUN_FMR_IP6_PREFIX,
+       IFLA_IPTUN_FMR_IP4_PREFIX,
+       IFLA_IPTUN_FMR_IP6_PREFIX_LEN,
+       IFLA_IPTUN_FMR_IP4_PREFIX_LEN,
+       IFLA_IPTUN_FMR_EA_LEN,
+       IFLA_IPTUN_FMR_OFFSET,
+       __IFLA_IPTUN_FMR_MAX,
+};
+#define IFLA_IPTUN_FMR_MAX (__IFLA_IPTUN_FMR_MAX - 1)
+
 enum tunnel_encap_types {
        TUNNEL_ENCAP_NONE,
        TUNNEL_ENCAP_FOU,
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -16,6 +16,8 @@
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  *
+ *     Changes:
+ * Steven Barth <cyrus@openwrt.org>:           MAP-E FMR support
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -77,11 +79,9 @@ static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
-static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
+static u32 HASH(const struct in6_addr *addr)
 {
-       u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
-
-       return hash_32(hash, HASH_SIZE_SHIFT);
+       return hash_32(ipv6_addr_hash(addr), HASH_SIZE_SHIFT);
 }
 
 static int ip6_tnl_dev_init(struct net_device *dev);
@@ -180,15 +180,24 @@ EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
 static struct ip6_tnl *
 ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
 {
-       unsigned int hash = HASH(remote, local);
+       unsigned int hash = HASH(local);
        struct ip6_tnl *t;
        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+       struct __ip6_tnl_fmr *fmr;
 
        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
-               if (ipv6_addr_equal(local, &t->parms.laddr) &&
-                   ipv6_addr_equal(remote, &t->parms.raddr) &&
-                   (t->dev->flags & IFF_UP))
+               if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+                               !(t->dev->flags & IFF_UP))
+                       continue;
+
+               if (ipv6_addr_equal(remote, &t->parms.raddr))
                        return t;
+
+               for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) {
+                       if (ipv6_prefix_equal(remote, &fmr->ip6_prefix,
+                                       fmr->ip6_prefix_len))
+                               return t;
+               }
        }
        t = rcu_dereference(ip6n->tnls_wc[0]);
        if (t && (t->dev->flags & IFF_UP))
@@ -218,7 +227,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n,
 
        if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
                prio = 1;
-               h = HASH(remote, local);
+               h = HASH(local);
        }
        return &ip6n->tnls[prio][h];
 }
@@ -391,6 +400,12 @@ ip6_tnl_dev_uninit(struct net_device *de
        struct net *net = t->net;
        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
+       while (t->parms.fmrs) {
+               struct __ip6_tnl_fmr *next = t->parms.fmrs->next;
+               kfree(t->parms.fmrs);
+               t->parms.fmrs = next;
+       }
+
        if (dev == ip6n->fb_tnl_dev)
                RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
        else
@@ -784,6 +799,108 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 }
 EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
 
+
+/**
+ * ip4ip6_fmr_calc - calculate target / source IPv6-address based on FMR
+ *   @dest: destination IPv6 address buffer
+ *   @skb: received socket buffer
+ *   @fmr: MAP FMR
+ *   @xmit: Calculate for xmit or rcv
+ **/
+static void ip4ip6_fmr_calc(struct in6_addr *dest,
+               const struct iphdr *iph, const uint8_t *end,
+               const struct __ip6_tnl_fmr *fmr, bool xmit)
+{
+       int psidlen = fmr->ea_len - (32 - fmr->ip4_prefix_len);
+       u8 *portp = NULL;
+       bool use_dest_addr;
+       const struct iphdr *dsth = iph;
+
+       if ((u8*)dsth >= end)
+               return;
+
+       /* find significant IP header */
+       if (iph->protocol == IPPROTO_ICMP) {
+               struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4);
+               if (ih && ((u8*)&ih[1]) <= end && (
+                       ih->type == ICMP_DEST_UNREACH ||
+                       ih->type == ICMP_SOURCE_QUENCH ||
+                       ih->type == ICMP_TIME_EXCEEDED ||
+                       ih->type == ICMP_PARAMETERPROB ||
+                       ih->type == ICMP_REDIRECT))
+                               dsth = (const struct iphdr*)&ih[1];
+       }
+
+       /* in xmit-path use dest port by default and source port only if
+               this is an ICMP reply to something else; vice versa in rcv-path */
+       use_dest_addr = (xmit && dsth == iph) || (!xmit && dsth != iph);
+
+       /* get dst port */
+       if (((u8*)&dsth[1]) <= end && (
+               dsth->protocol == IPPROTO_UDP ||
+               dsth->protocol == IPPROTO_TCP ||
+               dsth->protocol == IPPROTO_SCTP ||
+               dsth->protocol == IPPROTO_DCCP)) {
+                       /* for UDP, TCP, SCTP and DCCP source and dest port
+                       follow IPv4 header directly */
+                       portp = ((u8*)dsth) + dsth->ihl * 4;
+
+                       if (use_dest_addr)
+                               portp += sizeof(u16);
+       } else if (iph->protocol == IPPROTO_ICMP) {
+               struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4);
+
+               /* use icmp identifier as port */
+               if (((u8*)&ih) <= end && (
+                   (use_dest_addr && (
+                   ih->type == ICMP_ECHOREPLY ||
+                       ih->type == ICMP_TIMESTAMPREPLY ||
+                       ih->type == ICMP_INFO_REPLY ||
+                       ih->type == ICMP_ADDRESSREPLY)) ||
+                       (!use_dest_addr && (
+                       ih->type == ICMP_ECHO ||
+                       ih->type == ICMP_TIMESTAMP ||
+                       ih->type == ICMP_INFO_REQUEST ||
+                       ih->type == ICMP_ADDRESS)
+                       )))
+                               portp = (u8*)&ih->un.echo.id;
+       }
+
+       if ((portp && &portp[2] <= end) || psidlen == 0) {
+               int frombyte = fmr->ip6_prefix_len / 8;
+               int fromrem = fmr->ip6_prefix_len % 8;
+               int bytes = sizeof(struct in6_addr) - frombyte;
+               const u32 *addr = (use_dest_addr) ? &iph->daddr : &iph->saddr;
+               u64 eabits = ((u64)ntohl(*addr)) << (32 + fmr->ip4_prefix_len);
+               u64 t = 0;
+
+               /* extract PSID from port and add it to eabits */
+               u16 psidbits = 0;
+               if (psidlen > 0) {
+                       psidbits = ((u16)portp[0]) << 8 | ((u16)portp[1]);
+                       psidbits >>= 16 - psidlen - fmr->offset;
+                       psidbits = (u16)(psidbits << (16 - psidlen));
+                       eabits |= ((u64)psidbits) << (48 - (fmr->ea_len - psidlen));
+               }
+
+               /* rewrite destination address */
+               *dest = fmr->ip6_prefix;
+               memcpy(&dest->s6_addr[10], addr, sizeof(*addr));
+               dest->s6_addr16[7] = htons(psidbits >> (16 - psidlen));
+
+               if (bytes > sizeof(u64))
+                       bytes = sizeof(u64);
+
+               /* insert eabits */
+               memcpy(&t, &dest->s6_addr[frombyte], bytes);
+               t = be64_to_cpu(t) & ~(((((u64)1) << fmr->ea_len) - 1)
+                       << (64 - fmr->ea_len - fromrem));
+               t = cpu_to_be64(t | (eabits >> fromrem));
+               memcpy(&dest->s6_addr[frombyte], &t, bytes);
+       }
+}
+
+
 /**
  * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
  *   @skb: received socket buffer
@@ -828,6 +945,26 @@ static int ip6_tnl_rcv(struct sk_buff *s
                skb_reset_network_header(skb);
                skb->protocol = htons(protocol);
                memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+               if (protocol == ETH_P_IP && t->parms.fmrs &&
+                       !ipv6_addr_equal(&ipv6h->saddr, &t->parms.raddr)) {
+                               /* Packet didn't come from BR, so lookup FMR */
+                               struct __ip6_tnl_fmr *fmr;
+                               struct in6_addr expected = t->parms.raddr;
+                               for (fmr = t->parms.fmrs; fmr; fmr = fmr->next)
+                                       if (ipv6_prefix_equal(&ipv6h->saddr,
+                                               &fmr->ip6_prefix, fmr->ip6_prefix_len))
+                                                       break;
+
+                               /* Check that IPv6 matches IPv4 source to prevent spoofing */
+                               if (fmr)
+                                       ip4ip6_fmr_calc(&expected, ip_hdr(skb),
+                                                       skb_tail_pointer(skb), fmr, false);
+
+                               if (!ipv6_addr_equal(&ipv6h->saddr, &expected)) {
+                                       rcu_read_unlock();
+                                       goto discard;
+                               }
+               }
 
                __skb_tunnel_rx(skb, t->dev, t->net);
 
@@ -1089,6 +1226,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str
        __u8 dsfield;
        __u32 mtu;
        int err;
+       struct __ip6_tnl_fmr *fmr;
 
        /* ensure we can access the full inner ip header */
        if (!pskb_may_pull(skb, sizeof(struct iphdr)))
@@ -1114,6 +1252,18 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str
        if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
                fl6.flowi6_mark = skb->mark;
 
+       /* try to find matching FMR */
+       for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) {
+               unsigned mshift = 32 - fmr->ip4_prefix_len;
+               if (ntohl(fmr->ip4_prefix.s_addr) >> mshift ==
+                               ntohl(iph->daddr) >> mshift)
+                       break;
+       }
+
+       /* change dstaddr according to FMR */
+       if (fmr)
+               ip4ip6_fmr_calc(&fl6.daddr, iph, skb_tail_pointer(skb), fmr, true);
+
        err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
        if (err != 0) {
                /* XXX: send ICMP error even if DF is not set. */
@@ -1286,6 +1436,14 @@ ip6_tnl_change(struct ip6_tnl *t, const
        t->parms.flowinfo = p->flowinfo;
        t->parms.link = p->link;
        t->parms.proto = p->proto;
+
+       while (t->parms.fmrs) {
+               struct __ip6_tnl_fmr *next = t->parms.fmrs->next;
+               kfree(t->parms.fmrs);
+               t->parms.fmrs = next;
+       }
+       t->parms.fmrs = p->fmrs;
+
        ip6_tnl_dst_reset(t);
        ip6_tnl_link_config(t);
        return 0;
@@ -1316,6 +1474,7 @@ ip6_tnl_parm_from_user(struct __ip6_tnl_
        p->flowinfo = u->flowinfo;
        p->link = u->link;
        p->proto = u->proto;
+       p->fmrs = NULL;
        memcpy(p->name, u->name, sizeof(u->name));
 }
 
@@ -1591,6 +1750,15 @@ static int ip6_tnl_validate(struct nlatt
        return 0;
 }
 
+static const struct nla_policy ip6_tnl_fmr_policy[IFLA_IPTUN_FMR_MAX + 1] = {
+       [IFLA_IPTUN_FMR_IP6_PREFIX] = { .len = sizeof(struct in6_addr) },
+       [IFLA_IPTUN_FMR_IP4_PREFIX] = { .len = sizeof(struct in_addr) },
+       [IFLA_IPTUN_FMR_IP6_PREFIX_LEN] = { .type = NLA_U8 },
+       [IFLA_IPTUN_FMR_IP4_PREFIX_LEN] = { .type = NLA_U8 },
+       [IFLA_IPTUN_FMR_EA_LEN] = { .type = NLA_U8 },
+       [IFLA_IPTUN_FMR_OFFSET] = { .type = NLA_U8 }
+};
+
 static void ip6_tnl_netlink_parms(struct nlattr *data[],
                                  struct __ip6_tnl_parm *parms)
 {
@@ -1624,6 +1792,46 @@ static void ip6_tnl_netlink_parms(struct
 
        if (data[IFLA_IPTUN_PROTO])
                parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
+       if (data[IFLA_IPTUN_FMRS]) {
+               unsigned rem;
+               struct nlattr *fmr;
+               nla_for_each_nested(fmr, data[IFLA_IPTUN_FMRS], rem) {
+                       struct nlattr *fmrd[IFLA_IPTUN_FMR_MAX + 1], *c;
+                       struct __ip6_tnl_fmr *nfmr;
+
+                       nla_parse_nested(fmrd, IFLA_IPTUN_FMR_MAX,
+                               fmr, ip6_tnl_fmr_policy);
+
+                       if (!(nfmr = kzalloc(sizeof(*nfmr), GFP_KERNEL)))
+                               continue;
+
+                       nfmr->offset = 6;
+
+                       if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX]))
+                               nla_memcpy(&nfmr->ip6_prefix, fmrd[IFLA_IPTUN_FMR_IP6_PREFIX],
+                                       sizeof(nfmr->ip6_prefix));
+
+                       if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX]))
+                               nla_memcpy(&nfmr->ip4_prefix, fmrd[IFLA_IPTUN_FMR_IP4_PREFIX],
+                                       sizeof(nfmr->ip4_prefix));
+
+                       if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX_LEN]))
+                               nfmr->ip6_prefix_len = nla_get_u8(c);
+
+                       if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX_LEN]))
+                               nfmr->ip4_prefix_len = nla_get_u8(c);
+
+                       if ((c = fmrd[IFLA_IPTUN_FMR_EA_LEN]))
+                               nfmr->ea_len = nla_get_u8(c);
+
+                       if ((c = fmrd[IFLA_IPTUN_FMR_OFFSET]))
+                               nfmr->offset = nla_get_u8(c);
+
+                       nfmr->next = parms->fmrs;
+                       parms->fmrs = nfmr;
+               }
+       }
 }
 
 static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
@@ -1676,6 +1884,12 @@ static void ip6_tnl_dellink(struct net_d
 
 static size_t ip6_tnl_get_size(const struct net_device *dev)
 {
+       const struct ip6_tnl *t = netdev_priv(dev);
+       struct __ip6_tnl_fmr *c;
+       int fmrs = 0;
+       for (c = t->parms.fmrs; c; c = c->next)
+               ++fmrs;
+
        return
                /* IFLA_IPTUN_LINK */
                nla_total_size(4) +
@@ -1693,6 +1907,24 @@ static size_t ip6_tnl_get_size(const str
                nla_total_size(4) +
                /* IFLA_IPTUN_PROTO */
                nla_total_size(1) +
+               /* IFLA_IPTUN_FMRS */
+               nla_total_size(0) +
+               (
+                       /* nest */
+                       nla_total_size(0) +
+                       /* IFLA_IPTUN_FMR_IP6_PREFIX */
+                       nla_total_size(sizeof(struct in6_addr)) +
+                       /* IFLA_IPTUN_FMR_IP4_PREFIX */
+                       nla_total_size(sizeof(struct in_addr)) +
+                       /* IFLA_IPTUN_FMR_EA_LEN */
+                       nla_total_size(1) +
+                       /* IFLA_IPTUN_FMR_IP6_PREFIX_LEN */
+                       nla_total_size(1) +
+                       /* IFLA_IPTUN_FMR_IP4_PREFIX_LEN */
+                       nla_total_size(1) +
+                       /* IFLA_IPTUN_FMR_OFFSET */
+                       nla_total_size(1)
+               ) * fmrs +
                0;
 }
 
@@ -1700,6 +1932,9 @@ static int ip6_tnl_fill_info(struct sk_b
 {
        struct ip6_tnl *tunnel = netdev_priv(dev);
        struct __ip6_tnl_parm *parm = &tunnel->parms;
+       struct __ip6_tnl_fmr *c;
+       int fmrcnt = 0;
+       struct nlattr *fmrs;
 
        if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
            nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
@@ -1710,8 +1945,27 @@ static int ip6_tnl_fill_info(struct sk_b
            nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
            nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
            nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
-           nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
+           nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) ||
+           !(fmrs = nla_nest_start(skb, IFLA_IPTUN_FMRS)))
                goto nla_put_failure;
+
+       for (c = parm->fmrs; c; c = c->next) {
+               struct nlattr *fmr = nla_nest_start(skb, ++fmrcnt);
+               if (!fmr ||
+                       nla_put(skb, IFLA_IPTUN_FMR_IP6_PREFIX,
+                               sizeof(c->ip6_prefix), &c->ip6_prefix) ||
+                       nla_put(skb, IFLA_IPTUN_FMR_IP4_PREFIX,
+                               sizeof(c->ip4_prefix), &c->ip4_prefix) ||
+                       nla_put_u8(skb, IFLA_IPTUN_FMR_IP6_PREFIX_LEN, c->ip6_prefix_len) ||
+                       nla_put_u8(skb, IFLA_IPTUN_FMR_IP4_PREFIX_LEN, c->ip4_prefix_len) ||
+                       nla_put_u8(skb, IFLA_IPTUN_FMR_EA_LEN, c->ea_len) ||
+                       nla_put_u8(skb, IFLA_IPTUN_FMR_OFFSET, c->offset))
+                               goto nla_put_failure;
+
+               nla_nest_end(skb, fmr);
+       }
+       nla_nest_end(skb, fmrs);
+
        return 0;
 
 nla_put_failure:
@@ -1727,6 +1981,7 @@ static const struct nla_policy ip6_tnl_p
        [IFLA_IPTUN_FLOWINFO]           = { .type = NLA_U32 },
        [IFLA_IPTUN_FLAGS]              = { .type = NLA_U32 },
        [IFLA_IPTUN_PROTO]              = { .type = NLA_U8 },
+       [IFLA_IPTUN_FMRS]               = { .type = NLA_NESTED },
 };
 
 static struct rtnl_link_ops ip6_link_ops __read_mostly = {